dhisana 0.0.1.dev243__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/__init__.py +1 -0
- dhisana/cli/__init__.py +1 -0
- dhisana/cli/cli.py +20 -0
- dhisana/cli/datasets.py +27 -0
- dhisana/cli/models.py +26 -0
- dhisana/cli/predictions.py +20 -0
- dhisana/schemas/__init__.py +1 -0
- dhisana/schemas/common.py +399 -0
- dhisana/schemas/sales.py +965 -0
- dhisana/ui/__init__.py +1 -0
- dhisana/ui/components.py +472 -0
- dhisana/utils/__init__.py +1 -0
- dhisana/utils/add_mapping.py +352 -0
- dhisana/utils/agent_tools.py +51 -0
- dhisana/utils/apollo_tools.py +1597 -0
- dhisana/utils/assistant_tool_tag.py +4 -0
- dhisana/utils/built_with_api_tools.py +282 -0
- dhisana/utils/cache_output_tools.py +98 -0
- dhisana/utils/cache_output_tools_local.py +78 -0
- dhisana/utils/check_email_validity_tools.py +717 -0
- dhisana/utils/check_for_intent_signal.py +107 -0
- dhisana/utils/check_linkedin_url_validity.py +209 -0
- dhisana/utils/clay_tools.py +43 -0
- dhisana/utils/clean_properties.py +135 -0
- dhisana/utils/company_utils.py +60 -0
- dhisana/utils/compose_salesnav_query.py +259 -0
- dhisana/utils/compose_search_query.py +759 -0
- dhisana/utils/compose_three_step_workflow.py +234 -0
- dhisana/utils/composite_tools.py +137 -0
- dhisana/utils/dataframe_tools.py +237 -0
- dhisana/utils/domain_parser.py +45 -0
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_parse_helpers.py +132 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +933 -0
- dhisana/utils/extract_email_content_for_llm.py +101 -0
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +426 -0
- dhisana/utils/g2_tools.py +104 -0
- dhisana/utils/generate_content.py +41 -0
- dhisana/utils/generate_custom_message.py +271 -0
- dhisana/utils/generate_email.py +278 -0
- dhisana/utils/generate_email_response.py +465 -0
- dhisana/utils/generate_flow.py +102 -0
- dhisana/utils/generate_leads_salesnav.py +303 -0
- dhisana/utils/generate_linkedin_connect_message.py +224 -0
- dhisana/utils/generate_linkedin_response_message.py +317 -0
- dhisana/utils/generate_structured_output_internal.py +462 -0
- dhisana/utils/google_custom_search.py +267 -0
- dhisana/utils/google_oauth_tools.py +727 -0
- dhisana/utils/google_workspace_tools.py +1294 -0
- dhisana/utils/hubspot_clearbit.py +96 -0
- dhisana/utils/hubspot_crm_tools.py +2440 -0
- dhisana/utils/instantly_tools.py +149 -0
- dhisana/utils/linkedin_crawler.py +168 -0
- dhisana/utils/lusha_tools.py +333 -0
- dhisana/utils/mailgun_tools.py +156 -0
- dhisana/utils/mailreach_tools.py +123 -0
- dhisana/utils/microsoft365_tools.py +455 -0
- dhisana/utils/openai_assistant_and_file_utils.py +267 -0
- dhisana/utils/openai_helpers.py +977 -0
- dhisana/utils/openapi_spec_to_tools.py +45 -0
- dhisana/utils/openapi_tool/__init__.py +1 -0
- dhisana/utils/openapi_tool/api_models.py +633 -0
- dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
- dhisana/utils/openapi_tool/openapi_tool.py +319 -0
- dhisana/utils/parse_linkedin_messages_txt.py +100 -0
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +1226 -0
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/python_function_to_tools.py +83 -0
- dhisana/utils/research_lead.py +176 -0
- dhisana/utils/sales_navigator_crawler.py +1103 -0
- dhisana/utils/salesforce_crm_tools.py +477 -0
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +162 -0
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +852 -0
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +582 -0
- dhisana/utils/test_connect.py +2087 -0
- dhisana/utils/trasform_json.py +173 -0
- dhisana/utils/web_download_parse_tools.py +189 -0
- dhisana/utils/workflow_code_model.py +5 -0
- dhisana/utils/zoominfo_tools.py +357 -0
- dhisana/workflow/__init__.py +1 -0
- dhisana/workflow/agent.py +18 -0
- dhisana/workflow/flow.py +44 -0
- dhisana/workflow/task.py +43 -0
- dhisana/workflow/test.py +90 -0
- dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
- dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
- dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
- dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
- dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,977 @@
|
|
|
1
|
+
# Helper functions to call OpenAI Assistant
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
import inspect
|
|
5
|
+
import os
|
|
6
|
+
import csv
|
|
7
|
+
import json
|
|
8
|
+
import hashlib
|
|
9
|
+
import asyncio
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Type
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, Field, create_model
|
|
14
|
+
from fastapi import HTTPException
|
|
15
|
+
from openai import AsyncOpenAI, OpenAIError, pydantic_function_tool
|
|
16
|
+
|
|
17
|
+
from dhisana.utils import cache_output_tools
|
|
18
|
+
# from dhisana.utils.trasform_json import GLOBAL_GENERATED_PYTHON_CODE
|
|
19
|
+
|
|
20
|
+
from .agent_tools import GLOBAL_DATA_MODELS, GLOBAL_TOOLS_FUNCTIONS
|
|
21
|
+
from .google_workspace_tools import get_file_content_from_googledrive_by_name, write_content_to_googledrive
|
|
22
|
+
from .agent_tools import GLOBAL_OPENAI_ASSISTANT_TOOLS
|
|
23
|
+
from .openapi_spec_to_tools import (
|
|
24
|
+
OPENAPI_TOOL_CONFIGURATIONS,
|
|
25
|
+
OPENAPI_GLOBAL_OPENAI_ASSISTANT_TOOLS,
|
|
26
|
+
OPENAPI_CALLABALE_FUNCTIONS,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# This file has functions to execute the agent workflow using provided spec and OpenAPI.
|
|
30
|
+
# Also has helper functions to extract structured data from the agent response.
|
|
31
|
+
# TODO: we need to enhance the Agent workflow handling.
|
|
32
|
+
# TODO: Move the OpenAI related helper functions to a separate file.
|
|
33
|
+
|
|
34
|
+
def get_openai_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
35
|
+
"""
|
|
36
|
+
Retrieves the OPENAI_API_KEY access token from the provided tool configuration.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
tool_config (list): A list of dictionaries containing the tool configuration.
|
|
40
|
+
Each dictionary should have a "name" key and a "configuration" key,
|
|
41
|
+
where "configuration" is a list of dictionaries containing "name" and "value" keys.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
str: The OPENAI_API_KEY access token.
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
ValueError: If the OpenAI integration has not been configured.
|
|
48
|
+
"""
|
|
49
|
+
if tool_config:
|
|
50
|
+
openai_config = next(
|
|
51
|
+
(item for item in tool_config if item.get("name") == "openai"), None
|
|
52
|
+
)
|
|
53
|
+
if openai_config:
|
|
54
|
+
config_map = {
|
|
55
|
+
item["name"]: item["value"]
|
|
56
|
+
for item in openai_config.get("configuration", [])
|
|
57
|
+
if item
|
|
58
|
+
}
|
|
59
|
+
OPENAI_API_KEY = config_map.get("apiKey")
|
|
60
|
+
else:
|
|
61
|
+
OPENAI_API_KEY = None
|
|
62
|
+
else:
|
|
63
|
+
OPENAI_API_KEY = None
|
|
64
|
+
|
|
65
|
+
OPENAI_API_KEY = OPENAI_API_KEY or os.getenv("OPENAI_API_KEY")
|
|
66
|
+
if not OPENAI_API_KEY:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"OpenAI integration is not configured. Please configure the connection to OpenAI in Integrations."
|
|
69
|
+
)
|
|
70
|
+
return OPENAI_API_KEY
|
|
71
|
+
|
|
72
|
+
async def read_from_google_drive(path):
|
|
73
|
+
return await get_file_content_from_googledrive_by_name(file_name=path)
|
|
74
|
+
|
|
75
|
+
# Function to get headers for OpenAPI tools
|
|
76
|
+
def get_headers(toolname):
|
|
77
|
+
headers = OPENAPI_TOOL_CONFIGURATIONS.get(toolname, {}).get("headers", {})
|
|
78
|
+
return headers
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_params(toolname):
|
|
82
|
+
params = OPENAPI_TOOL_CONFIGURATIONS.get(toolname, {}).get("params", {})
|
|
83
|
+
return params
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
async def run_assistant(client, assistant, thread, prompt, response_type, allowed_tools):
|
|
87
|
+
"""
|
|
88
|
+
Runs the assistant with the given parameters.
|
|
89
|
+
"""
|
|
90
|
+
await send_initial_message(client, thread, prompt)
|
|
91
|
+
allowed_tool_items = get_allowed_tool_items(allowed_tools)
|
|
92
|
+
response_format = get_response_format(response_type)
|
|
93
|
+
|
|
94
|
+
max_iterations = 5
|
|
95
|
+
iteration_count = 0
|
|
96
|
+
|
|
97
|
+
while iteration_count < max_iterations:
|
|
98
|
+
run = await client.beta.threads.runs.create_and_poll(
|
|
99
|
+
thread_id=thread.id,
|
|
100
|
+
assistant_id=assistant.id,
|
|
101
|
+
response_format=response_format,
|
|
102
|
+
tools=allowed_tool_items,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
while run.status == 'requires_action':
|
|
106
|
+
if iteration_count >= max_iterations:
|
|
107
|
+
logging.info("Exceeded maximum number of iterations for requires_action.")
|
|
108
|
+
await client.beta.threads.runs.cancel(run_id=run.id, thread_id=thread.id)
|
|
109
|
+
return "FAIL"
|
|
110
|
+
|
|
111
|
+
tool_outputs = await handle_required_action(run)
|
|
112
|
+
if tool_outputs:
|
|
113
|
+
run = await submit_tool_outputs(client, thread, run, tool_outputs)
|
|
114
|
+
else:
|
|
115
|
+
break
|
|
116
|
+
iteration_count += 1
|
|
117
|
+
logging.info("Iteration count: %s", iteration_count)
|
|
118
|
+
|
|
119
|
+
if run.status == 'completed':
|
|
120
|
+
status = await handle_run_completion(client, thread, run)
|
|
121
|
+
return status
|
|
122
|
+
elif run.status == 'failed' and run.last_error.code == 'rate_limit_exceeded':
|
|
123
|
+
logging.info("Rate limit exceeded. Retrying in 30 seconds...")
|
|
124
|
+
await asyncio.sleep(30)
|
|
125
|
+
elif run.status == 'expired':
|
|
126
|
+
logging.info("Run expired. Creating a new run...")
|
|
127
|
+
else:
|
|
128
|
+
logging.info(f"Run status: {run.status}")
|
|
129
|
+
return run.status
|
|
130
|
+
|
|
131
|
+
iteration_count += 1
|
|
132
|
+
if (iteration_count >= max_iterations):
|
|
133
|
+
logging.info("Exceeded maximum number of iterations.")
|
|
134
|
+
await client.beta.threads.runs.cancel(run_id=run.id, thread_id=thread.id)
|
|
135
|
+
return 'FAIL'
|
|
136
|
+
logging.info("Iteration count: %s", iteration_count)
|
|
137
|
+
|
|
138
|
+
return "FAIL"
|
|
139
|
+
|
|
140
|
+
async def handle_run_completion(client, thread, run):
|
|
141
|
+
if run.status == 'completed':
|
|
142
|
+
messages = await client.beta.threads.messages.list(thread_id=thread.id)
|
|
143
|
+
return messages.data[0].content[0].text.value
|
|
144
|
+
else:
|
|
145
|
+
return run.status
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
async def send_initial_message(client, thread, prompt):
|
|
149
|
+
await client.beta.threads.messages.create(
|
|
150
|
+
thread_id=thread.id,
|
|
151
|
+
role="user",
|
|
152
|
+
content=prompt,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def get_allowed_tool_items(allowed_tools):
|
|
157
|
+
allowed_tool_items = [
|
|
158
|
+
tool for tool in GLOBAL_OPENAI_ASSISTANT_TOOLS
|
|
159
|
+
if tool['type'] == 'function' and tool['function']['name'] in allowed_tools
|
|
160
|
+
]
|
|
161
|
+
allowed_tool_items.extend([
|
|
162
|
+
tool for tool in OPENAPI_GLOBAL_OPENAI_ASSISTANT_TOOLS
|
|
163
|
+
if tool['type'] == 'function' and tool['function']['name'] in allowed_tools
|
|
164
|
+
])
|
|
165
|
+
return allowed_tool_items
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_response_format(response_type):
|
|
169
|
+
return {
|
|
170
|
+
'type': 'json_schema',
|
|
171
|
+
'json_schema': {
|
|
172
|
+
"name": response_type.__name__,
|
|
173
|
+
"schema": response_type.model_json_schema()
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
async def handle_required_action(run):
|
|
179
|
+
tool_outputs = []
|
|
180
|
+
current_batch_size = 0
|
|
181
|
+
max_batch_size = 256 * 1024 # 256 KB
|
|
182
|
+
logging.info(f"Handling required action")
|
|
183
|
+
|
|
184
|
+
if hasattr(run, 'required_action') and hasattr(run.required_action, 'submit_tool_outputs'):
|
|
185
|
+
for tool in run.required_action.submit_tool_outputs.tool_calls:
|
|
186
|
+
function, openai_function = get_function(tool.function.name)
|
|
187
|
+
if function:
|
|
188
|
+
output_str, output_size = await invoke_function(function, tool, openai_function)
|
|
189
|
+
if current_batch_size + output_size > max_batch_size:
|
|
190
|
+
tool_outputs.append(
|
|
191
|
+
{"tool_call_id": tool.id, "output": ""})
|
|
192
|
+
else:
|
|
193
|
+
tool_outputs.append(
|
|
194
|
+
{"tool_call_id": tool.id, "output": output_str})
|
|
195
|
+
current_batch_size += output_size
|
|
196
|
+
else:
|
|
197
|
+
logging.info(f"Function {tool.function.name} not found.")
|
|
198
|
+
tool_outputs.append(
|
|
199
|
+
{"tool_call_id": tool.id, "output": "No results found"})
|
|
200
|
+
|
|
201
|
+
return tool_outputs
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def get_function(function_name):
|
|
205
|
+
function = GLOBAL_TOOLS_FUNCTIONS.get(function_name)
|
|
206
|
+
openai_function = False
|
|
207
|
+
if not function:
|
|
208
|
+
function = OPENAPI_CALLABALE_FUNCTIONS.get(function_name)
|
|
209
|
+
openai_function = True
|
|
210
|
+
return function, openai_function
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
async def invoke_function(function, tool, openai_function):
|
|
214
|
+
try:
|
|
215
|
+
function_args = json.loads(tool.function.arguments)
|
|
216
|
+
logging.info(f"Invoking function {tool.function.name} with args: {function_args}\n")
|
|
217
|
+
|
|
218
|
+
if openai_function:
|
|
219
|
+
output = await invoke_openapi_function(function, function_args, tool.function.name)
|
|
220
|
+
else:
|
|
221
|
+
if asyncio.iscoroutinefunction(function):
|
|
222
|
+
output = await function(**function_args)
|
|
223
|
+
else:
|
|
224
|
+
output = function(**function_args)
|
|
225
|
+
output_str = json.dumps(output)
|
|
226
|
+
output_size = len(output_str.encode('utf-8'))
|
|
227
|
+
logging.info(f"\nOutput from function {tool.function.name}: {output_str[:256]}\n")
|
|
228
|
+
|
|
229
|
+
return output_str, output_size
|
|
230
|
+
except Exception as e:
|
|
231
|
+
logging.info(f"invoke_function Error invoking function {tool.function.name}: {e}")
|
|
232
|
+
return "No results found", 0
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
async def invoke_openapi_function(function, function_args, function_name):
|
|
236
|
+
|
|
237
|
+
json_body = function_args.get("json", None)
|
|
238
|
+
path_params = function_args.get("path_params", None)
|
|
239
|
+
fn_args = {"path_params": path_params, "data": json_body}
|
|
240
|
+
headers = get_headers(function_name)
|
|
241
|
+
|
|
242
|
+
query_params = function_args.get("params", {})
|
|
243
|
+
params = get_params(function_name)
|
|
244
|
+
query_params.update(params)
|
|
245
|
+
status, reason, text = await function(
|
|
246
|
+
name=function_name,
|
|
247
|
+
fn_args=fn_args,
|
|
248
|
+
headers=headers,
|
|
249
|
+
params=query_params,
|
|
250
|
+
)
|
|
251
|
+
logging.info(f"\nOutput from function {function_name}: {status} {reason}\n")
|
|
252
|
+
return {
|
|
253
|
+
"status_code": status,
|
|
254
|
+
"text": text,
|
|
255
|
+
"reason": reason,
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
async def submit_tool_outputs(client, thread, run, tool_outputs):
|
|
260
|
+
try:
|
|
261
|
+
return await client.beta.threads.runs.submit_tool_outputs_and_poll(
|
|
262
|
+
thread_id=thread.id,
|
|
263
|
+
run_id=run.id,
|
|
264
|
+
tool_outputs=tool_outputs
|
|
265
|
+
)
|
|
266
|
+
except Exception as e:
|
|
267
|
+
logging.info(f"Failed to submit tool outputs: ${e}")
|
|
268
|
+
return run
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
async def handle_run_completion(client, thread, run):
|
|
272
|
+
if run.status == 'completed':
|
|
273
|
+
messages = await client.beta.threads.messages.list(thread_id=thread.id)
|
|
274
|
+
return messages.data[0].content[0].text.value
|
|
275
|
+
else:
|
|
276
|
+
logging.info(f"Run status: {run.status}")
|
|
277
|
+
return run.status
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
async def extract_and_structure_data(client, assistant, thread, prompt, task_inputs, response_type, allowed_tools):
|
|
282
|
+
# Replace placeholders in the prompt with task inputs
|
|
283
|
+
formatted_prompt = prompt
|
|
284
|
+
for key, value in task_inputs.items():
|
|
285
|
+
placeholder = "{{ inputs." + key + " }}"
|
|
286
|
+
formatted_prompt = formatted_prompt.replace(placeholder, str(value))
|
|
287
|
+
|
|
288
|
+
# Create a hash of the formatted prompt
|
|
289
|
+
prompt_hash = hashlib.md5(formatted_prompt.encode()).hexdigest()
|
|
290
|
+
|
|
291
|
+
# Retrieve cached response if available
|
|
292
|
+
cached_response = cache_output_tools.retrieve_output("extract_and_structure_data", prompt_hash)
|
|
293
|
+
if cached_response is not None:
|
|
294
|
+
return cached_response
|
|
295
|
+
|
|
296
|
+
# Run the assistant and cache the output if successful
|
|
297
|
+
output = await run_assistant(client, assistant, thread, formatted_prompt, response_type, allowed_tools)
|
|
298
|
+
if output and output != 'FAIL':
|
|
299
|
+
cache_output_tools.cache_output("extract_and_structure_data", prompt_hash, output)
|
|
300
|
+
|
|
301
|
+
return output
|
|
302
|
+
|
|
303
|
+
class RowItem(BaseModel):
|
|
304
|
+
column_value: str
|
|
305
|
+
|
|
306
|
+
class GenericList(BaseModel):
|
|
307
|
+
rows: List[RowItem]
|
|
308
|
+
|
|
309
|
+
def lookup_response_type(name: str):
|
|
310
|
+
for model in GLOBAL_DATA_MODELS:
|
|
311
|
+
if model.__name__ == name:
|
|
312
|
+
return model
|
|
313
|
+
return GenericList # Default response type
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
async def process_agent_request(row_batch: List[Dict], workflow: Dict, custom_instructions: str) -> List[Dict]:
|
|
317
|
+
"""
|
|
318
|
+
Process agent request using the OpenAI client.
|
|
319
|
+
"""
|
|
320
|
+
#TODO: handle timezone here.
|
|
321
|
+
logging.getLogger("openai").setLevel(logging.WARNING)
|
|
322
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
323
|
+
|
|
324
|
+
todays_date = datetime.now(timezone.utc).isoformat()
|
|
325
|
+
todays_day = datetime.now(timezone.utc).strftime('%d')
|
|
326
|
+
instructions = f"Hi, You are an AI Assistant. Help the user with their tasks.\n\n Todays date is: {todays_date} Todays day is {todays_day} \n\n{custom_instructions}\n\n"
|
|
327
|
+
try:
|
|
328
|
+
client = AsyncOpenAI()
|
|
329
|
+
assistant = await client.beta.assistants.create(
|
|
330
|
+
name="AI Assistant",
|
|
331
|
+
instructions=instructions,
|
|
332
|
+
tools=[],
|
|
333
|
+
model="gpt-5.1-chat"
|
|
334
|
+
)
|
|
335
|
+
thread = await client.beta.threads.create()
|
|
336
|
+
parsed_outputs = []
|
|
337
|
+
task_outputs = {} # Dictionary to store outputs of tasks
|
|
338
|
+
input_list = {}
|
|
339
|
+
input_list['initial_input_list'] = {
|
|
340
|
+
"data": row_batch,
|
|
341
|
+
"format": "list"
|
|
342
|
+
}
|
|
343
|
+
task_outputs['initial_input'] = input_list
|
|
344
|
+
for task in workflow['tasks']:
|
|
345
|
+
# Process each task
|
|
346
|
+
task_outputs = await process_task(client, assistant, thread, task, task_outputs)
|
|
347
|
+
# Collect the final output
|
|
348
|
+
parsed_outputs.append(task_outputs)
|
|
349
|
+
return parsed_outputs
|
|
350
|
+
except Exception as e:
|
|
351
|
+
logging.warning(f"process_agent_request An error occurred: {e}", exc_info=True)
|
|
352
|
+
return [{"error": f"Error Processing Leads. process_agent_request process_agent_request An error occurred: {e}"}]
|
|
353
|
+
finally:
|
|
354
|
+
try:
|
|
355
|
+
await client.beta.assistants.delete(assistant.id)
|
|
356
|
+
except Exception as e:
|
|
357
|
+
logging.info(f"Error deleting assistant: {e}")
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
async def process_task(client, assistant, thread, task, task_outputs):
|
|
361
|
+
"""
|
|
362
|
+
Process a single task in the workflow.
|
|
363
|
+
"""
|
|
364
|
+
# Prepare inputs
|
|
365
|
+
task_inputs = await prepare_task_inputs(task, task_outputs)
|
|
366
|
+
|
|
367
|
+
# Run the operation
|
|
368
|
+
output = await run_task_operation(client, assistant, thread, task, task_inputs)
|
|
369
|
+
|
|
370
|
+
# Store outputs
|
|
371
|
+
await store_task_outputs(task, output, task_outputs)
|
|
372
|
+
|
|
373
|
+
return task_outputs
|
|
374
|
+
|
|
375
|
+
async def read_csv_rows(file_path):
|
|
376
|
+
rows = []
|
|
377
|
+
with open(file_path, mode='r') as file:
|
|
378
|
+
csv_reader = csv.reader(file)
|
|
379
|
+
for row in csv_reader:
|
|
380
|
+
rows.append(row)
|
|
381
|
+
return rows
|
|
382
|
+
|
|
383
|
+
async def prepare_task_inputs(task, task_outputs):
|
|
384
|
+
"""
|
|
385
|
+
Prepare the inputs for a task based on its input specifications.
|
|
386
|
+
"""
|
|
387
|
+
inputs = task.get('inputs', {})
|
|
388
|
+
task_inputs = {}
|
|
389
|
+
for input_name, input_spec in inputs.items():
|
|
390
|
+
source = input_spec.get('source', {})
|
|
391
|
+
source_type = source.get('type', '')
|
|
392
|
+
format = input_spec.get('format', 'list')
|
|
393
|
+
if source_type == 'inline':
|
|
394
|
+
# Get from inline source
|
|
395
|
+
input_data = source.get('data')
|
|
396
|
+
elif source_type == 'task_output':
|
|
397
|
+
# Get from previous task output
|
|
398
|
+
task_id = source.get('task_id')
|
|
399
|
+
output_key = source.get('output_key')
|
|
400
|
+
previous_task_output = task_outputs.get(task_id, {})
|
|
401
|
+
if isinstance(previous_task_output, dict):
|
|
402
|
+
output_item = previous_task_output.get(output_key)
|
|
403
|
+
input_data = output_item['data']
|
|
404
|
+
else:
|
|
405
|
+
input_data = previous_task_output
|
|
406
|
+
|
|
407
|
+
# Ensure input_data is a list
|
|
408
|
+
if not isinstance(input_data, list):
|
|
409
|
+
input_data = [input_data]
|
|
410
|
+
elif source_type == 'google_drive':
|
|
411
|
+
# Handle Google Drive source
|
|
412
|
+
path = source.get('location')
|
|
413
|
+
input_data_path = await read_from_google_drive(path)
|
|
414
|
+
input_data = await read_csv_rows(input_data_path)
|
|
415
|
+
elif source_type == 'local_path':
|
|
416
|
+
# Handle local path source
|
|
417
|
+
input_data_path = source.get('location')
|
|
418
|
+
input_data = await read_csv_rows(input_data_path)
|
|
419
|
+
else:
|
|
420
|
+
input_data = None
|
|
421
|
+
if input_data:
|
|
422
|
+
task_inputs[input_name] = {
|
|
423
|
+
"format": format,
|
|
424
|
+
"data" : input_data
|
|
425
|
+
}
|
|
426
|
+
return task_inputs
|
|
427
|
+
|
|
428
|
+
async def process_using_ai_assistant(prompt_template, task_inputs, client, assistant, thread, response_type, allowed_tools, task):
|
|
429
|
+
outputs = []
|
|
430
|
+
for key, value in task_inputs.items():
|
|
431
|
+
format = value.get('format', 'list')
|
|
432
|
+
items = value.get('data')
|
|
433
|
+
if format == 'list':
|
|
434
|
+
for item in items:
|
|
435
|
+
formatted_prompt = prompt_template.replace(
|
|
436
|
+
"{{ inputs." + key + " }}", json.dumps(item)
|
|
437
|
+
)
|
|
438
|
+
# Run assistant with prompt
|
|
439
|
+
logging.info(formatted_prompt)
|
|
440
|
+
output = await extract_and_structure_data(
|
|
441
|
+
client, assistant, thread, formatted_prompt, task_inputs, response_type, allowed_tools
|
|
442
|
+
)
|
|
443
|
+
if output and output == 'FAIL':
|
|
444
|
+
pass
|
|
445
|
+
output_json = None
|
|
446
|
+
if isinstance(output, str):
|
|
447
|
+
try:
|
|
448
|
+
output_json = json.loads(output)
|
|
449
|
+
except json.JSONDecodeError:
|
|
450
|
+
pass
|
|
451
|
+
if (
|
|
452
|
+
output_json
|
|
453
|
+
and isinstance(output_json, dict)
|
|
454
|
+
and 'data' in output_json
|
|
455
|
+
and isinstance(output_json['data'], list)
|
|
456
|
+
):
|
|
457
|
+
# Deserialize the JSON to responseType
|
|
458
|
+
items_deserialized = [response_type.parse_obj(item) for item in output_json['data']]
|
|
459
|
+
# Iterate over items_deserialized
|
|
460
|
+
for item in items_deserialized:
|
|
461
|
+
# Serialize each item back to JSON
|
|
462
|
+
serialized_item = json.dumps(item.dict())
|
|
463
|
+
outputs.append(serialized_item)
|
|
464
|
+
elif output_json and isinstance(output_json, dict):
|
|
465
|
+
output_deserialized = response_type.parse_obj(output_json)
|
|
466
|
+
outputs.append(json.dumps(output_deserialized.dict()))
|
|
467
|
+
else:
|
|
468
|
+
logging.warning("output_json is None or not a dict")
|
|
469
|
+
if outputs and len(outputs) > 0:
|
|
470
|
+
interim_return_val = {
|
|
471
|
+
"data": outputs,
|
|
472
|
+
"format": "list"
|
|
473
|
+
}
|
|
474
|
+
await store_task_outputs_interim_checkpoint(task, interim_return_val, task_inputs)
|
|
475
|
+
else:
|
|
476
|
+
# Handle other formats if necessary
|
|
477
|
+
pass
|
|
478
|
+
return outputs
|
|
479
|
+
|
|
480
|
+
async def process_transform_json(task_inputs, response_type, task):
|
|
481
|
+
outputs = []
|
|
482
|
+
task_id = task.get('id')
|
|
483
|
+
for input_name, input_info in task_inputs.items():
|
|
484
|
+
data_format = input_info.get('format', 'list')
|
|
485
|
+
input_info.get('transform_function_name', f"{task_id}_transform_input_json")
|
|
486
|
+
items = input_info.get('data')
|
|
487
|
+
if data_format == 'list':
|
|
488
|
+
if items and len(items) > 0:
|
|
489
|
+
# Generate the transformation function
|
|
490
|
+
# if GLOBAL_GENERATED_PYTHON_CODE.get(transform_function_name, ''):
|
|
491
|
+
# transformation_function = GLOBAL_GENERATED_PYTHON_CODE[transform_function_name]
|
|
492
|
+
# else:
|
|
493
|
+
# function_name = await transform_json_with_type(
|
|
494
|
+
# items[0],
|
|
495
|
+
# response_type,
|
|
496
|
+
# transform_function_name
|
|
497
|
+
# )
|
|
498
|
+
# transformation_function = GLOBAL_GENERATED_PYTHON_CODE[function_name]
|
|
499
|
+
transformation_function = lambda x: x
|
|
500
|
+
for item in items:
|
|
501
|
+
input_json = json.loads(item)
|
|
502
|
+
output_json = transformation_function(input_json)
|
|
503
|
+
output_deserialized = response_type.parse_obj(output_json)
|
|
504
|
+
outputs.append(json.dumps(output_deserialized.dict()))
|
|
505
|
+
if outputs:
|
|
506
|
+
interim_return_val = {
|
|
507
|
+
"data": outputs,
|
|
508
|
+
"format": "list"
|
|
509
|
+
}
|
|
510
|
+
await store_task_outputs_interim_checkpoint(task, interim_return_val, task_inputs)
|
|
511
|
+
else:
|
|
512
|
+
# Handle other formats if necessary
|
|
513
|
+
pass
|
|
514
|
+
return outputs
|
|
515
|
+
|
|
516
|
+
async def process_function_call(operation, task_inputs, outputs):
|
|
517
|
+
function_name = operation.get('function', '')
|
|
518
|
+
args = operation.get('args', [])
|
|
519
|
+
function = GLOBAL_TOOLS_FUNCTIONS.get(function_name)
|
|
520
|
+
if function is None:
|
|
521
|
+
raise Exception(f"Function {function_name} not found.")
|
|
522
|
+
|
|
523
|
+
for key, value in task_inputs.items():
|
|
524
|
+
format = value.get('format', 'list')
|
|
525
|
+
items = value.get('data')
|
|
526
|
+
item_parse_args_with_llm = operation.get('args_llm_parsed', 'False')
|
|
527
|
+
if format == 'list':
|
|
528
|
+
for item in items:
|
|
529
|
+
# Prepare function keyword arguments
|
|
530
|
+
if item_parse_args_with_llm == 'True':
|
|
531
|
+
function_kwargs, status = await get_function_call_arguments(
|
|
532
|
+
item, function_name
|
|
533
|
+
)
|
|
534
|
+
if status == 'FAIL':
|
|
535
|
+
continue
|
|
536
|
+
else:
|
|
537
|
+
function_kwargs = {arg: item.get(arg, '') for arg in args}
|
|
538
|
+
if asyncio.iscoroutinefunction(function):
|
|
539
|
+
output = await function(**function_kwargs)
|
|
540
|
+
else:
|
|
541
|
+
output = function(**function_kwargs)
|
|
542
|
+
process_output(output, outputs)
|
|
543
|
+
else:
|
|
544
|
+
# Prepare function arguments
|
|
545
|
+
function_kwargs = {
|
|
546
|
+
arg: task_inputs.get(arg, {}).get("data", '') for arg in args
|
|
547
|
+
}
|
|
548
|
+
if asyncio.iscoroutinefunction(function):
|
|
549
|
+
output = await function(**function_kwargs)
|
|
550
|
+
else:
|
|
551
|
+
output = function(**function_kwargs)
|
|
552
|
+
process_output(output, outputs)
|
|
553
|
+
return outputs
|
|
554
|
+
|
|
555
|
+
def process_output(output, outputs):
|
|
556
|
+
if isinstance(output, list):
|
|
557
|
+
for item in output:
|
|
558
|
+
if isinstance(item, dict):
|
|
559
|
+
outputs.append(json.dumps(item))
|
|
560
|
+
else:
|
|
561
|
+
outputs.append(item)
|
|
562
|
+
else:
|
|
563
|
+
outputs.append(output)
|
|
564
|
+
|
|
565
|
+
async def run_task_operation(client, assistant, thread, task, task_inputs):
|
|
566
|
+
"""
|
|
567
|
+
Execute the operation defined in the task.
|
|
568
|
+
"""
|
|
569
|
+
operation = task.get('operation', {})
|
|
570
|
+
operation_type = operation.get('type', '')
|
|
571
|
+
allowed_tools = operation.get('allowed_tools', [])
|
|
572
|
+
response_type_name = operation.get('response_type', 'GenericList')
|
|
573
|
+
response_type = lookup_response_type(response_type_name)
|
|
574
|
+
outputs = []
|
|
575
|
+
|
|
576
|
+
if operation_type == 'ai_assistant_call':
|
|
577
|
+
prompt_template = operation.get('prompt', '')
|
|
578
|
+
outputs = await process_using_ai_assistant(
|
|
579
|
+
prompt_template, task_inputs, client, assistant, thread, response_type, allowed_tools, task
|
|
580
|
+
)
|
|
581
|
+
elif operation_type == 'ai_transform_input_json':
|
|
582
|
+
outputs = await process_transform_json(
|
|
583
|
+
task_inputs, response_type, task
|
|
584
|
+
)
|
|
585
|
+
elif operation_type == 'python_callable':
|
|
586
|
+
outputs = await process_function_call(operation, task_inputs, outputs)
|
|
587
|
+
return {
|
|
588
|
+
"data": outputs,
|
|
589
|
+
"format": "list"
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
async def store_task_outputs_interim_checkpoint(task, output, task_outputs):
|
|
593
|
+
"""
|
|
594
|
+
Store the outputs of a task for use in subsequent tasks.
|
|
595
|
+
"""
|
|
596
|
+
outputs = task.get('outputs', {})
|
|
597
|
+
if outputs:
|
|
598
|
+
for output_name, output_spec in outputs.items():
|
|
599
|
+
destination = output_spec.get('destination', {})
|
|
600
|
+
if destination:
|
|
601
|
+
dest_type = destination.get('type')
|
|
602
|
+
path_template = destination.get('path_template')
|
|
603
|
+
if path_template:
|
|
604
|
+
current_timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
|
|
605
|
+
current_timestamp = '_interim'
|
|
606
|
+
path = path_template.replace('{timestamp}', current_timestamp)
|
|
607
|
+
path = path.replace('{task_id}', task['id'])
|
|
608
|
+
local_path = path
|
|
609
|
+
|
|
610
|
+
if dest_type == 'google_drive':
|
|
611
|
+
local_path = os.path.join('/tmp', task['id'], path)
|
|
612
|
+
|
|
613
|
+
if dest_type == 'google_drive' or dest_type == 'local_path':
|
|
614
|
+
directory = os.path.dirname(local_path)
|
|
615
|
+
if directory and not os.path.exists(directory):
|
|
616
|
+
os.makedirs(directory)
|
|
617
|
+
logging.info(f"Writing output to {local_path}\n")
|
|
618
|
+
|
|
619
|
+
if output.get("format", "") == 'list':
|
|
620
|
+
data_list = []
|
|
621
|
+
for item in output.get("data", []):
|
|
622
|
+
try:
|
|
623
|
+
data_list.append(json.loads(item))
|
|
624
|
+
except json.JSONDecodeError:
|
|
625
|
+
# Handle or log the invalid JSON item
|
|
626
|
+
pass
|
|
627
|
+
# Write the full list first with a 'full_list' prefix
|
|
628
|
+
def get_prefixed_path(file_path, prefix):
|
|
629
|
+
directory, filename = os.path.split(file_path)
|
|
630
|
+
name, ext = os.path.splitext(filename)
|
|
631
|
+
prefixed_filename = f"{prefix}_{name}{ext}"
|
|
632
|
+
return os.path.join(directory, prefixed_filename)
|
|
633
|
+
|
|
634
|
+
full_list_local_path = get_prefixed_path(local_path, 'full_list')
|
|
635
|
+
full_list_directory = os.path.dirname(full_list_local_path)
|
|
636
|
+
if full_list_directory and not os.path.exists(full_list_directory):
|
|
637
|
+
os.makedirs(full_list_directory)
|
|
638
|
+
logging.info(f"Writing full list output to {full_list_local_path}\n")
|
|
639
|
+
|
|
640
|
+
with open(full_list_local_path, 'w') as full_file:
|
|
641
|
+
if data_list and len(data_list) > 0:
|
|
642
|
+
headers = [key for key in data_list[0].keys()]
|
|
643
|
+
writer = csv.DictWriter(full_file, fieldnames=headers)
|
|
644
|
+
writer.writeheader()
|
|
645
|
+
for data in data_list:
|
|
646
|
+
filtered_data = {key: value for key, value in data.items() if key in headers}
|
|
647
|
+
writer.writerow(filtered_data)
|
|
648
|
+
else:
|
|
649
|
+
writer = csv.DictWriter(full_file, fieldnames=[])
|
|
650
|
+
writer.writeheader()
|
|
651
|
+
return task_outputs
|
|
652
|
+
|
|
653
|
+
def filter_data_list(data_list, filter_by):
|
|
654
|
+
"""
|
|
655
|
+
Filter the data_list based on conditions specified in filter_by.
|
|
656
|
+
Supported operators: 'gt', 'lt', 'eq', 'gte', 'lte', 'ne'
|
|
657
|
+
"""
|
|
658
|
+
from operator import gt, lt, eq, ge, le, ne
|
|
659
|
+
|
|
660
|
+
operator_map = {
|
|
661
|
+
'gt': gt,
|
|
662
|
+
'lt': lt,
|
|
663
|
+
'eq': eq,
|
|
664
|
+
'gte': ge,
|
|
665
|
+
'lte': le,
|
|
666
|
+
'ne': ne
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
filtered_list = []
|
|
670
|
+
for item in data_list:
|
|
671
|
+
include_item = True
|
|
672
|
+
for property_name, conditions in filter_by.items():
|
|
673
|
+
value = item.get(property_name)
|
|
674
|
+
if value is None or (isinstance(value, str) and value.strip() == ""):
|
|
675
|
+
include_item = False
|
|
676
|
+
break
|
|
677
|
+
for op, compare_value in conditions.items():
|
|
678
|
+
op_func = operator_map.get(op)
|
|
679
|
+
if op_func is None:
|
|
680
|
+
continue # Unsupported operator
|
|
681
|
+
try:
|
|
682
|
+
# Convert values to float for comparison if possible
|
|
683
|
+
item_value = float(value)
|
|
684
|
+
compare_value = float(compare_value)
|
|
685
|
+
except (ValueError, TypeError):
|
|
686
|
+
item_value = value
|
|
687
|
+
if not op_func(item_value, compare_value):
|
|
688
|
+
include_item = False
|
|
689
|
+
break
|
|
690
|
+
if not include_item:
|
|
691
|
+
break
|
|
692
|
+
if include_item:
|
|
693
|
+
filtered_list.append(item)
|
|
694
|
+
return filtered_list
|
|
695
|
+
|
|
696
|
+
def convert_value(value):
|
|
697
|
+
"""
|
|
698
|
+
Convert the value to the appropriate type for sorting.
|
|
699
|
+
"""
|
|
700
|
+
if value is None or (isinstance(value, str) and value.strip() == ""):
|
|
701
|
+
return ""
|
|
702
|
+
try:
|
|
703
|
+
return float(value)
|
|
704
|
+
except ValueError:
|
|
705
|
+
pass
|
|
706
|
+
try:
|
|
707
|
+
return datetime.fromisoformat(value)
|
|
708
|
+
except ValueError:
|
|
709
|
+
pass
|
|
710
|
+
return str(value)
|
|
711
|
+
|
|
712
|
+
def filter_and_sort(data_list, output_spec):
|
|
713
|
+
"""
|
|
714
|
+
Filter and sort the data_list based on the output_spec.
|
|
715
|
+
"""
|
|
716
|
+
required_properties = output_spec.get('required_properties', [])
|
|
717
|
+
if required_properties:
|
|
718
|
+
data_list = remove_empty_property_rows(data_list, required_properties)
|
|
719
|
+
|
|
720
|
+
dedup_by = output_spec.get('deduplication_properties', [])
|
|
721
|
+
if dedup_by:
|
|
722
|
+
data_list = deduplicate_list_by_properties(data_list, dedup_by)
|
|
723
|
+
|
|
724
|
+
sort_by_asc = output_spec.get('sort_by_asc', [])
|
|
725
|
+
sort_by_desc = output_spec.get('sort_by_desc', [])
|
|
726
|
+
|
|
727
|
+
# Combine sort fields with their corresponding order
|
|
728
|
+
sort_fields = [(key, True) for key in sort_by_asc] + [(key, False) for key in sort_by_desc]
|
|
729
|
+
logging.info(f"Sorting by: {sort_fields}")
|
|
730
|
+
|
|
731
|
+
# Sort from least significant to most significant key
|
|
732
|
+
for key, ascending in reversed(sort_fields):
|
|
733
|
+
data_list = sorted(
|
|
734
|
+
data_list,
|
|
735
|
+
key=lambda x: convert_value(x.get(key)),
|
|
736
|
+
reverse=not ascending
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
filter_by = output_spec.get('filter_by', {})
|
|
740
|
+
if filter_by:
|
|
741
|
+
data_list = filter_data_list(data_list, filter_by)
|
|
742
|
+
|
|
743
|
+
return data_list
|
|
744
|
+
|
|
745
|
+
# Store the output of a task run.
|
|
746
|
+
async def store_task_outputs(task, output, task_outputs):
|
|
747
|
+
"""
|
|
748
|
+
Store the outputs of a task for use in subsequent tasks.
|
|
749
|
+
"""
|
|
750
|
+
outputs = task.get('outputs', {})
|
|
751
|
+
if outputs:
|
|
752
|
+
for output_name, output_spec in outputs.items():
|
|
753
|
+
# Store output in task_outputs using task id and output_name
|
|
754
|
+
if task['id'] not in task_outputs:
|
|
755
|
+
task_outputs[task['id']] = {}
|
|
756
|
+
|
|
757
|
+
destination = output_spec.get('destination', {})
|
|
758
|
+
if destination:
|
|
759
|
+
dest_type = destination.get('type')
|
|
760
|
+
path_template = destination.get('path_template')
|
|
761
|
+
if path_template:
|
|
762
|
+
current_timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
|
|
763
|
+
path = path_template.replace('{timestamp}', current_timestamp)
|
|
764
|
+
path = path.replace('{task_id}', task['id'])
|
|
765
|
+
local_path = path
|
|
766
|
+
|
|
767
|
+
if dest_type == 'google_drive':
|
|
768
|
+
local_path = os.path.join('/tmp', task['id'], path)
|
|
769
|
+
|
|
770
|
+
if dest_type == 'google_drive' or dest_type == 'local_path':
|
|
771
|
+
directory = os.path.dirname(local_path)
|
|
772
|
+
if directory and not os.path.exists(directory):
|
|
773
|
+
os.makedirs(directory)
|
|
774
|
+
logging.info(f"Writing output to {local_path}\n")
|
|
775
|
+
|
|
776
|
+
if output.get("format", "") == 'list':
|
|
777
|
+
data_list = []
|
|
778
|
+
for item in output.get("data", []):
|
|
779
|
+
try:
|
|
780
|
+
data_list.append(json.loads(item))
|
|
781
|
+
except json.JSONDecodeError:
|
|
782
|
+
# Handle or log the invalid JSON item
|
|
783
|
+
pass
|
|
784
|
+
logging.info(f"Total count: {len(data_list)}")
|
|
785
|
+
|
|
786
|
+
# Write the full list first with a 'full_list' prefix
|
|
787
|
+
def get_prefixed_path(file_path, prefix):
|
|
788
|
+
directory, filename = os.path.split(file_path)
|
|
789
|
+
name, ext = os.path.splitext(filename)
|
|
790
|
+
prefixed_filename = f"{prefix}_{name}{ext}"
|
|
791
|
+
return os.path.join(directory, prefixed_filename)
|
|
792
|
+
|
|
793
|
+
full_list_local_path = get_prefixed_path(local_path, 'full_list')
|
|
794
|
+
full_list_directory = os.path.dirname(full_list_local_path)
|
|
795
|
+
if full_list_directory and not os.path.exists(full_list_directory):
|
|
796
|
+
os.makedirs(full_list_directory)
|
|
797
|
+
logging.info(f"Writing full list output to {full_list_local_path}\n")
|
|
798
|
+
|
|
799
|
+
with open(full_list_local_path, 'w') as full_file:
|
|
800
|
+
if data_list and len(data_list) > 0:
|
|
801
|
+
headers = [key for key in data_list[0].keys()]
|
|
802
|
+
writer = csv.DictWriter(full_file, fieldnames=headers)
|
|
803
|
+
writer.writeheader()
|
|
804
|
+
for data in data_list:
|
|
805
|
+
filtered_data = {key: value for key, value in data.items() if key in headers}
|
|
806
|
+
writer.writerow(filtered_data)
|
|
807
|
+
else:
|
|
808
|
+
writer = csv.DictWriter(full_file, fieldnames=[])
|
|
809
|
+
writer.writeheader()
|
|
810
|
+
|
|
811
|
+
if data_list and len(data_list) > 0:
|
|
812
|
+
data_list = filter_and_sort(data_list, output_spec)
|
|
813
|
+
if data_list and len(data_list) > 0:
|
|
814
|
+
logging.info(f"Deduped and removed count: {len(data_list)}")
|
|
815
|
+
headers = [key for key, value in data_list[0].items() if isinstance(value, (str, int, float, bool))]
|
|
816
|
+
with open(local_path, 'w') as file:
|
|
817
|
+
writer = csv.DictWriter(file, fieldnames=headers)
|
|
818
|
+
writer.writeheader()
|
|
819
|
+
for data in data_list:
|
|
820
|
+
filtered_data = {key: value for key, value in data.items() if key in headers}
|
|
821
|
+
writer.writerow(filtered_data)
|
|
822
|
+
else:
|
|
823
|
+
writer = csv.DictWriter(full_file, fieldnames=[])
|
|
824
|
+
writer.writeheader()
|
|
825
|
+
else:
|
|
826
|
+
with open(local_path, 'w') as file:
|
|
827
|
+
writer = csv.DictWriter(file, fieldnames=[])
|
|
828
|
+
writer.writeheader()
|
|
829
|
+
else:
|
|
830
|
+
with open(local_path, 'w') as file:
|
|
831
|
+
file.write(str(output))
|
|
832
|
+
else:
|
|
833
|
+
pass
|
|
834
|
+
if dest_type == 'google_drive':
|
|
835
|
+
await write_to_google_drive(path, local_path)
|
|
836
|
+
|
|
837
|
+
task_outputs[task['id']][output_name] = output
|
|
838
|
+
else:
|
|
839
|
+
task_outputs[task['id']] = output
|
|
840
|
+
|
|
841
|
+
# Remove rows with None or empty values for the specified properties.
|
|
842
|
+
def remove_empty_property_rows(data_list, properties):
|
|
843
|
+
"""
|
|
844
|
+
Remove rows with None or empty values for the specified properties.
|
|
845
|
+
"""
|
|
846
|
+
filtered_list = []
|
|
847
|
+
for item in data_list:
|
|
848
|
+
empty = False
|
|
849
|
+
for property_name in properties:
|
|
850
|
+
value = item.get(property_name)
|
|
851
|
+
if value is None or (isinstance(value, str) and value.strip() == ""):
|
|
852
|
+
empty = True
|
|
853
|
+
break
|
|
854
|
+
if not empty:
|
|
855
|
+
filtered_list.append(item)
|
|
856
|
+
return filtered_list
|
|
857
|
+
|
|
858
|
+
# Deduplicate list by given input properties.
|
|
859
|
+
def deduplicate_list_by_properties(data_list, properties):
|
|
860
|
+
"""
|
|
861
|
+
Deduplicate a list of dictionaries by a list of properties in order.
|
|
862
|
+
Only deduplicate if the property value is not None or empty, strip spaces, and compare in lowercase.
|
|
863
|
+
"""
|
|
864
|
+
for property_name in properties:
|
|
865
|
+
seen = set()
|
|
866
|
+
deduplicated_list = []
|
|
867
|
+
for item in data_list:
|
|
868
|
+
value = item.get(property_name)
|
|
869
|
+
value = str(value or "").strip().lower()
|
|
870
|
+
if value == "":
|
|
871
|
+
deduplicated_list.append(item)
|
|
872
|
+
elif value not in seen:
|
|
873
|
+
seen.add(value)
|
|
874
|
+
deduplicated_list.append(item)
|
|
875
|
+
data_list = deduplicated_list
|
|
876
|
+
return data_list
|
|
877
|
+
|
|
878
|
+
async def write_to_google_drive(cloud_path, local_path):
|
|
879
|
+
# Placeholder function for writing to Google Drive
|
|
880
|
+
await write_content_to_googledrive(cloud_path, local_path)
|
|
881
|
+
logging.info(f"Writing to Google Drive at {cloud_path} {local_path}")
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
# Get a dynamic pyndantic model that corresponds to the function signature
|
|
885
|
+
def get_dynamic_model(function_name: str, function: Callable) -> Type[BaseModel]:
|
|
886
|
+
"""
|
|
887
|
+
Dynamically creates a Pydantic BaseModel subclass based on the parameters of the given function.
|
|
888
|
+
|
|
889
|
+
Args:
|
|
890
|
+
function_name (str): The name of the function.
|
|
891
|
+
function (Callable): The function object.
|
|
892
|
+
|
|
893
|
+
Returns:
|
|
894
|
+
Type[BaseModel]: A dynamically created Pydantic model class.
|
|
895
|
+
"""
|
|
896
|
+
# Retrieve the function's signature
|
|
897
|
+
signature = inspect.signature(function)
|
|
898
|
+
fields = {}
|
|
899
|
+
|
|
900
|
+
for param_name, param in signature.parameters.items():
|
|
901
|
+
# Extract the parameter's type annotation
|
|
902
|
+
annotation = param.annotation if param.annotation is not inspect.Parameter.empty else Any
|
|
903
|
+
|
|
904
|
+
# Determine if the parameter has a default value
|
|
905
|
+
if param.default is not inspect.Parameter.empty:
|
|
906
|
+
default_value = param.default
|
|
907
|
+
else:
|
|
908
|
+
default_value = ...
|
|
909
|
+
|
|
910
|
+
# Create a Field with a description
|
|
911
|
+
field_info = Field(
|
|
912
|
+
default=default_value,
|
|
913
|
+
description=f"Parameter '{param_name}' of type '{annotation.__name__}'"
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
# Add the field to the fields dictionary
|
|
917
|
+
fields[param_name] = (annotation, field_info)
|
|
918
|
+
|
|
919
|
+
# Create and return the dynamic model
|
|
920
|
+
dynamic_model = create_model(
|
|
921
|
+
f"{function_name}_Arguments",
|
|
922
|
+
__base__=BaseModel,
|
|
923
|
+
**fields
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
return dynamic_model
|
|
927
|
+
|
|
928
|
+
# Given a function definition and input string extract the function call arguments using OpenAI API
|
|
929
|
+
async def get_function_call_arguments(input_text: str, function_name: str) -> Tuple[Dict[str, Any], str]:
|
|
930
|
+
"""
|
|
931
|
+
Extracts function call arguments from the input text using OpenAI's API.
|
|
932
|
+
|
|
933
|
+
Args:
|
|
934
|
+
input_text (str): The input text containing the function call.
|
|
935
|
+
function_name (str): The name of the function to extract arguments for.
|
|
936
|
+
|
|
937
|
+
Returns:
|
|
938
|
+
Tuple[Dict[str, Any], str]: A tuple containing the function arguments as a dictionary and a status message.
|
|
939
|
+
"""
|
|
940
|
+
try:
|
|
941
|
+
# Retrieve the function and its parameters
|
|
942
|
+
function, _ = get_function(function_name)
|
|
943
|
+
|
|
944
|
+
# Generate a dynamic Pydantic model based on the function's parameters
|
|
945
|
+
dynamic_model = get_dynamic_model(function_name, function)
|
|
946
|
+
|
|
947
|
+
# Define the tool using the dynamic model
|
|
948
|
+
tool = pydantic_function_tool(dynamic_model)
|
|
949
|
+
|
|
950
|
+
# Construct the prompt
|
|
951
|
+
prompt = f"Extract the arguments for the function '{function_name}' from the following input:\n\n{input_text}"
|
|
952
|
+
|
|
953
|
+
# Initialize the OpenAI client
|
|
954
|
+
client = AsyncOpenAI()
|
|
955
|
+
|
|
956
|
+
# Make the API call
|
|
957
|
+
response = await client.beta.chat.completions.parse(
|
|
958
|
+
model="gpt-5.1-chat",
|
|
959
|
+
messages=[
|
|
960
|
+
{"role": "system", "content": "Extract function arguments in JSON format."},
|
|
961
|
+
{"role": "user", "content": prompt},
|
|
962
|
+
],
|
|
963
|
+
tools=[tool],
|
|
964
|
+
response_format=dynamic_model
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
# Extract the function call arguments from the response
|
|
968
|
+
parsed_output = vars(response.choices[0].message.parsed)
|
|
969
|
+
return parsed_output, 'SUCCESS'
|
|
970
|
+
|
|
971
|
+
except OpenAIError as e:
|
|
972
|
+
logging.error(f"OpenAI API error: {e}")
|
|
973
|
+
raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
|
|
974
|
+
except Exception as e:
|
|
975
|
+
logging.error(f"Unexpected error: {e}")
|
|
976
|
+
raise HTTPException(status_code=500, detail="An unexpected error occurred while processing your request.")
|
|
977
|
+
|