dhisana 0.0.1.dev243__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. dhisana/__init__.py +1 -0
  2. dhisana/cli/__init__.py +1 -0
  3. dhisana/cli/cli.py +20 -0
  4. dhisana/cli/datasets.py +27 -0
  5. dhisana/cli/models.py +26 -0
  6. dhisana/cli/predictions.py +20 -0
  7. dhisana/schemas/__init__.py +1 -0
  8. dhisana/schemas/common.py +399 -0
  9. dhisana/schemas/sales.py +965 -0
  10. dhisana/ui/__init__.py +1 -0
  11. dhisana/ui/components.py +472 -0
  12. dhisana/utils/__init__.py +1 -0
  13. dhisana/utils/add_mapping.py +352 -0
  14. dhisana/utils/agent_tools.py +51 -0
  15. dhisana/utils/apollo_tools.py +1597 -0
  16. dhisana/utils/assistant_tool_tag.py +4 -0
  17. dhisana/utils/built_with_api_tools.py +282 -0
  18. dhisana/utils/cache_output_tools.py +98 -0
  19. dhisana/utils/cache_output_tools_local.py +78 -0
  20. dhisana/utils/check_email_validity_tools.py +717 -0
  21. dhisana/utils/check_for_intent_signal.py +107 -0
  22. dhisana/utils/check_linkedin_url_validity.py +209 -0
  23. dhisana/utils/clay_tools.py +43 -0
  24. dhisana/utils/clean_properties.py +135 -0
  25. dhisana/utils/company_utils.py +60 -0
  26. dhisana/utils/compose_salesnav_query.py +259 -0
  27. dhisana/utils/compose_search_query.py +759 -0
  28. dhisana/utils/compose_three_step_workflow.py +234 -0
  29. dhisana/utils/composite_tools.py +137 -0
  30. dhisana/utils/dataframe_tools.py +237 -0
  31. dhisana/utils/domain_parser.py +45 -0
  32. dhisana/utils/email_body_utils.py +72 -0
  33. dhisana/utils/email_parse_helpers.py +132 -0
  34. dhisana/utils/email_provider.py +375 -0
  35. dhisana/utils/enrich_lead_information.py +933 -0
  36. dhisana/utils/extract_email_content_for_llm.py +101 -0
  37. dhisana/utils/fetch_openai_config.py +129 -0
  38. dhisana/utils/field_validators.py +426 -0
  39. dhisana/utils/g2_tools.py +104 -0
  40. dhisana/utils/generate_content.py +41 -0
  41. dhisana/utils/generate_custom_message.py +271 -0
  42. dhisana/utils/generate_email.py +278 -0
  43. dhisana/utils/generate_email_response.py +465 -0
  44. dhisana/utils/generate_flow.py +102 -0
  45. dhisana/utils/generate_leads_salesnav.py +303 -0
  46. dhisana/utils/generate_linkedin_connect_message.py +224 -0
  47. dhisana/utils/generate_linkedin_response_message.py +317 -0
  48. dhisana/utils/generate_structured_output_internal.py +462 -0
  49. dhisana/utils/google_custom_search.py +267 -0
  50. dhisana/utils/google_oauth_tools.py +727 -0
  51. dhisana/utils/google_workspace_tools.py +1294 -0
  52. dhisana/utils/hubspot_clearbit.py +96 -0
  53. dhisana/utils/hubspot_crm_tools.py +2440 -0
  54. dhisana/utils/instantly_tools.py +149 -0
  55. dhisana/utils/linkedin_crawler.py +168 -0
  56. dhisana/utils/lusha_tools.py +333 -0
  57. dhisana/utils/mailgun_tools.py +156 -0
  58. dhisana/utils/mailreach_tools.py +123 -0
  59. dhisana/utils/microsoft365_tools.py +455 -0
  60. dhisana/utils/openai_assistant_and_file_utils.py +267 -0
  61. dhisana/utils/openai_helpers.py +977 -0
  62. dhisana/utils/openapi_spec_to_tools.py +45 -0
  63. dhisana/utils/openapi_tool/__init__.py +1 -0
  64. dhisana/utils/openapi_tool/api_models.py +633 -0
  65. dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
  66. dhisana/utils/openapi_tool/openapi_tool.py +319 -0
  67. dhisana/utils/parse_linkedin_messages_txt.py +100 -0
  68. dhisana/utils/profile.py +37 -0
  69. dhisana/utils/proxy_curl_tools.py +1226 -0
  70. dhisana/utils/proxycurl_search_leads.py +426 -0
  71. dhisana/utils/python_function_to_tools.py +83 -0
  72. dhisana/utils/research_lead.py +176 -0
  73. dhisana/utils/sales_navigator_crawler.py +1103 -0
  74. dhisana/utils/salesforce_crm_tools.py +477 -0
  75. dhisana/utils/search_router.py +131 -0
  76. dhisana/utils/search_router_jobs.py +51 -0
  77. dhisana/utils/sendgrid_tools.py +162 -0
  78. dhisana/utils/serarch_router_local_business.py +75 -0
  79. dhisana/utils/serpapi_additional_tools.py +290 -0
  80. dhisana/utils/serpapi_google_jobs.py +117 -0
  81. dhisana/utils/serpapi_google_search.py +188 -0
  82. dhisana/utils/serpapi_local_business_search.py +129 -0
  83. dhisana/utils/serpapi_search_tools.py +852 -0
  84. dhisana/utils/serperdev_google_jobs.py +125 -0
  85. dhisana/utils/serperdev_local_business.py +154 -0
  86. dhisana/utils/serperdev_search.py +233 -0
  87. dhisana/utils/smtp_email_tools.py +582 -0
  88. dhisana/utils/test_connect.py +2087 -0
  89. dhisana/utils/trasform_json.py +173 -0
  90. dhisana/utils/web_download_parse_tools.py +189 -0
  91. dhisana/utils/workflow_code_model.py +5 -0
  92. dhisana/utils/zoominfo_tools.py +357 -0
  93. dhisana/workflow/__init__.py +1 -0
  94. dhisana/workflow/agent.py +18 -0
  95. dhisana/workflow/flow.py +44 -0
  96. dhisana/workflow/task.py +43 -0
  97. dhisana/workflow/test.py +90 -0
  98. dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
  99. dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
  100. dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
  101. dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
  102. dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
@@ -0,0 +1,977 @@
1
+ # Helper functions to call OpenAI Assistant
2
+
3
+ from datetime import datetime, timezone
4
+ import inspect
5
+ import os
6
+ import csv
7
+ import json
8
+ import hashlib
9
+ import asyncio
10
+ import logging
11
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type
12
+
13
+ from pydantic import BaseModel, Field, create_model
14
+ from fastapi import HTTPException
15
+ from openai import AsyncOpenAI, OpenAIError, pydantic_function_tool
16
+
17
+ from dhisana.utils import cache_output_tools
18
+ # from dhisana.utils.trasform_json import GLOBAL_GENERATED_PYTHON_CODE
19
+
20
+ from .agent_tools import GLOBAL_DATA_MODELS, GLOBAL_TOOLS_FUNCTIONS
21
+ from .google_workspace_tools import get_file_content_from_googledrive_by_name, write_content_to_googledrive
22
+ from .agent_tools import GLOBAL_OPENAI_ASSISTANT_TOOLS
23
+ from .openapi_spec_to_tools import (
24
+ OPENAPI_TOOL_CONFIGURATIONS,
25
+ OPENAPI_GLOBAL_OPENAI_ASSISTANT_TOOLS,
26
+ OPENAPI_CALLABALE_FUNCTIONS,
27
+ )
28
+
29
+ # This file has functions to execute the agent workflow using provided spec and OpenAPI.
30
+ # Also has helper functions to extract structured data from the agent response.
31
+ # TODO: we need to enhance the Agent workflow handling.
32
+ # TODO: Move the OpenAI related helper functions to a separate file.
33
+
34
+ def get_openai_access_token(tool_config: Optional[List[Dict]] = None) -> str:
35
+ """
36
+ Retrieves the OPENAI_API_KEY access token from the provided tool configuration.
37
+
38
+ Args:
39
+ tool_config (list): A list of dictionaries containing the tool configuration.
40
+ Each dictionary should have a "name" key and a "configuration" key,
41
+ where "configuration" is a list of dictionaries containing "name" and "value" keys.
42
+
43
+ Returns:
44
+ str: The OPENAI_API_KEY access token.
45
+
46
+ Raises:
47
+ ValueError: If the OpenAI integration has not been configured.
48
+ """
49
+ if tool_config:
50
+ openai_config = next(
51
+ (item for item in tool_config if item.get("name") == "openai"), None
52
+ )
53
+ if openai_config:
54
+ config_map = {
55
+ item["name"]: item["value"]
56
+ for item in openai_config.get("configuration", [])
57
+ if item
58
+ }
59
+ OPENAI_API_KEY = config_map.get("apiKey")
60
+ else:
61
+ OPENAI_API_KEY = None
62
+ else:
63
+ OPENAI_API_KEY = None
64
+
65
+ OPENAI_API_KEY = OPENAI_API_KEY or os.getenv("OPENAI_API_KEY")
66
+ if not OPENAI_API_KEY:
67
+ raise ValueError(
68
+ "OpenAI integration is not configured. Please configure the connection to OpenAI in Integrations."
69
+ )
70
+ return OPENAI_API_KEY
71
+
72
+ async def read_from_google_drive(path):
73
+ return await get_file_content_from_googledrive_by_name(file_name=path)
74
+
75
+ # Function to get headers for OpenAPI tools
76
+ def get_headers(toolname):
77
+ headers = OPENAPI_TOOL_CONFIGURATIONS.get(toolname, {}).get("headers", {})
78
+ return headers
79
+
80
+
81
+ def get_params(toolname):
82
+ params = OPENAPI_TOOL_CONFIGURATIONS.get(toolname, {}).get("params", {})
83
+ return params
84
+
85
+
86
+ async def run_assistant(client, assistant, thread, prompt, response_type, allowed_tools):
87
+ """
88
+ Runs the assistant with the given parameters.
89
+ """
90
+ await send_initial_message(client, thread, prompt)
91
+ allowed_tool_items = get_allowed_tool_items(allowed_tools)
92
+ response_format = get_response_format(response_type)
93
+
94
+ max_iterations = 5
95
+ iteration_count = 0
96
+
97
+ while iteration_count < max_iterations:
98
+ run = await client.beta.threads.runs.create_and_poll(
99
+ thread_id=thread.id,
100
+ assistant_id=assistant.id,
101
+ response_format=response_format,
102
+ tools=allowed_tool_items,
103
+ )
104
+
105
+ while run.status == 'requires_action':
106
+ if iteration_count >= max_iterations:
107
+ logging.info("Exceeded maximum number of iterations for requires_action.")
108
+ await client.beta.threads.runs.cancel(run_id=run.id, thread_id=thread.id)
109
+ return "FAIL"
110
+
111
+ tool_outputs = await handle_required_action(run)
112
+ if tool_outputs:
113
+ run = await submit_tool_outputs(client, thread, run, tool_outputs)
114
+ else:
115
+ break
116
+ iteration_count += 1
117
+ logging.info("Iteration count: %s", iteration_count)
118
+
119
+ if run.status == 'completed':
120
+ status = await handle_run_completion(client, thread, run)
121
+ return status
122
+ elif run.status == 'failed' and run.last_error.code == 'rate_limit_exceeded':
123
+ logging.info("Rate limit exceeded. Retrying in 30 seconds...")
124
+ await asyncio.sleep(30)
125
+ elif run.status == 'expired':
126
+ logging.info("Run expired. Creating a new run...")
127
+ else:
128
+ logging.info(f"Run status: {run.status}")
129
+ return run.status
130
+
131
+ iteration_count += 1
132
+ if (iteration_count >= max_iterations):
133
+ logging.info("Exceeded maximum number of iterations.")
134
+ await client.beta.threads.runs.cancel(run_id=run.id, thread_id=thread.id)
135
+ return 'FAIL'
136
+ logging.info("Iteration count: %s", iteration_count)
137
+
138
+ return "FAIL"
139
+
140
+ async def handle_run_completion(client, thread, run):
141
+ if run.status == 'completed':
142
+ messages = await client.beta.threads.messages.list(thread_id=thread.id)
143
+ return messages.data[0].content[0].text.value
144
+ else:
145
+ return run.status
146
+
147
+
148
+ async def send_initial_message(client, thread, prompt):
149
+ await client.beta.threads.messages.create(
150
+ thread_id=thread.id,
151
+ role="user",
152
+ content=prompt,
153
+ )
154
+
155
+
156
+ def get_allowed_tool_items(allowed_tools):
157
+ allowed_tool_items = [
158
+ tool for tool in GLOBAL_OPENAI_ASSISTANT_TOOLS
159
+ if tool['type'] == 'function' and tool['function']['name'] in allowed_tools
160
+ ]
161
+ allowed_tool_items.extend([
162
+ tool for tool in OPENAPI_GLOBAL_OPENAI_ASSISTANT_TOOLS
163
+ if tool['type'] == 'function' and tool['function']['name'] in allowed_tools
164
+ ])
165
+ return allowed_tool_items
166
+
167
+
168
+ def get_response_format(response_type):
169
+ return {
170
+ 'type': 'json_schema',
171
+ 'json_schema': {
172
+ "name": response_type.__name__,
173
+ "schema": response_type.model_json_schema()
174
+ }
175
+ }
176
+
177
+
178
+ async def handle_required_action(run):
179
+ tool_outputs = []
180
+ current_batch_size = 0
181
+ max_batch_size = 256 * 1024 # 256 KB
182
+ logging.info(f"Handling required action")
183
+
184
+ if hasattr(run, 'required_action') and hasattr(run.required_action, 'submit_tool_outputs'):
185
+ for tool in run.required_action.submit_tool_outputs.tool_calls:
186
+ function, openai_function = get_function(tool.function.name)
187
+ if function:
188
+ output_str, output_size = await invoke_function(function, tool, openai_function)
189
+ if current_batch_size + output_size > max_batch_size:
190
+ tool_outputs.append(
191
+ {"tool_call_id": tool.id, "output": ""})
192
+ else:
193
+ tool_outputs.append(
194
+ {"tool_call_id": tool.id, "output": output_str})
195
+ current_batch_size += output_size
196
+ else:
197
+ logging.info(f"Function {tool.function.name} not found.")
198
+ tool_outputs.append(
199
+ {"tool_call_id": tool.id, "output": "No results found"})
200
+
201
+ return tool_outputs
202
+
203
+
204
+ def get_function(function_name):
205
+ function = GLOBAL_TOOLS_FUNCTIONS.get(function_name)
206
+ openai_function = False
207
+ if not function:
208
+ function = OPENAPI_CALLABALE_FUNCTIONS.get(function_name)
209
+ openai_function = True
210
+ return function, openai_function
211
+
212
+
213
+ async def invoke_function(function, tool, openai_function):
214
+ try:
215
+ function_args = json.loads(tool.function.arguments)
216
+ logging.info(f"Invoking function {tool.function.name} with args: {function_args}\n")
217
+
218
+ if openai_function:
219
+ output = await invoke_openapi_function(function, function_args, tool.function.name)
220
+ else:
221
+ if asyncio.iscoroutinefunction(function):
222
+ output = await function(**function_args)
223
+ else:
224
+ output = function(**function_args)
225
+ output_str = json.dumps(output)
226
+ output_size = len(output_str.encode('utf-8'))
227
+ logging.info(f"\nOutput from function {tool.function.name}: {output_str[:256]}\n")
228
+
229
+ return output_str, output_size
230
+ except Exception as e:
231
+ logging.info(f"invoke_function Error invoking function {tool.function.name}: {e}")
232
+ return "No results found", 0
233
+
234
+
235
+ async def invoke_openapi_function(function, function_args, function_name):
236
+
237
+ json_body = function_args.get("json", None)
238
+ path_params = function_args.get("path_params", None)
239
+ fn_args = {"path_params": path_params, "data": json_body}
240
+ headers = get_headers(function_name)
241
+
242
+ query_params = function_args.get("params", {})
243
+ params = get_params(function_name)
244
+ query_params.update(params)
245
+ status, reason, text = await function(
246
+ name=function_name,
247
+ fn_args=fn_args,
248
+ headers=headers,
249
+ params=query_params,
250
+ )
251
+ logging.info(f"\nOutput from function {function_name}: {status} {reason}\n")
252
+ return {
253
+ "status_code": status,
254
+ "text": text,
255
+ "reason": reason,
256
+ }
257
+
258
+
259
+ async def submit_tool_outputs(client, thread, run, tool_outputs):
260
+ try:
261
+ return await client.beta.threads.runs.submit_tool_outputs_and_poll(
262
+ thread_id=thread.id,
263
+ run_id=run.id,
264
+ tool_outputs=tool_outputs
265
+ )
266
+ except Exception as e:
267
+ logging.info(f"Failed to submit tool outputs: ${e}")
268
+ return run
269
+
270
+
271
+ async def handle_run_completion(client, thread, run):
272
+ if run.status == 'completed':
273
+ messages = await client.beta.threads.messages.list(thread_id=thread.id)
274
+ return messages.data[0].content[0].text.value
275
+ else:
276
+ logging.info(f"Run status: {run.status}")
277
+ return run.status
278
+
279
+
280
+
281
+ async def extract_and_structure_data(client, assistant, thread, prompt, task_inputs, response_type, allowed_tools):
282
+ # Replace placeholders in the prompt with task inputs
283
+ formatted_prompt = prompt
284
+ for key, value in task_inputs.items():
285
+ placeholder = "{{ inputs." + key + " }}"
286
+ formatted_prompt = formatted_prompt.replace(placeholder, str(value))
287
+
288
+ # Create a hash of the formatted prompt
289
+ prompt_hash = hashlib.md5(formatted_prompt.encode()).hexdigest()
290
+
291
+ # Retrieve cached response if available
292
+ cached_response = cache_output_tools.retrieve_output("extract_and_structure_data", prompt_hash)
293
+ if cached_response is not None:
294
+ return cached_response
295
+
296
+ # Run the assistant and cache the output if successful
297
+ output = await run_assistant(client, assistant, thread, formatted_prompt, response_type, allowed_tools)
298
+ if output and output != 'FAIL':
299
+ cache_output_tools.cache_output("extract_and_structure_data", prompt_hash, output)
300
+
301
+ return output
302
+
303
+ class RowItem(BaseModel):
304
+ column_value: str
305
+
306
+ class GenericList(BaseModel):
307
+ rows: List[RowItem]
308
+
309
+ def lookup_response_type(name: str):
310
+ for model in GLOBAL_DATA_MODELS:
311
+ if model.__name__ == name:
312
+ return model
313
+ return GenericList # Default response type
314
+
315
+
316
+ async def process_agent_request(row_batch: List[Dict], workflow: Dict, custom_instructions: str) -> List[Dict]:
317
+ """
318
+ Process agent request using the OpenAI client.
319
+ """
320
+ #TODO: handle timezone here.
321
+ logging.getLogger("openai").setLevel(logging.WARNING)
322
+ logging.getLogger("httpx").setLevel(logging.WARNING)
323
+
324
+ todays_date = datetime.now(timezone.utc).isoformat()
325
+ todays_day = datetime.now(timezone.utc).strftime('%d')
326
+ instructions = f"Hi, You are an AI Assistant. Help the user with their tasks.\n\n Todays date is: {todays_date} Todays day is {todays_day} \n\n{custom_instructions}\n\n"
327
+ try:
328
+ client = AsyncOpenAI()
329
+ assistant = await client.beta.assistants.create(
330
+ name="AI Assistant",
331
+ instructions=instructions,
332
+ tools=[],
333
+ model="gpt-5.1-chat"
334
+ )
335
+ thread = await client.beta.threads.create()
336
+ parsed_outputs = []
337
+ task_outputs = {} # Dictionary to store outputs of tasks
338
+ input_list = {}
339
+ input_list['initial_input_list'] = {
340
+ "data": row_batch,
341
+ "format": "list"
342
+ }
343
+ task_outputs['initial_input'] = input_list
344
+ for task in workflow['tasks']:
345
+ # Process each task
346
+ task_outputs = await process_task(client, assistant, thread, task, task_outputs)
347
+ # Collect the final output
348
+ parsed_outputs.append(task_outputs)
349
+ return parsed_outputs
350
+ except Exception as e:
351
+ logging.warning(f"process_agent_request An error occurred: {e}", exc_info=True)
352
+ return [{"error": f"Error Processing Leads. process_agent_request process_agent_request An error occurred: {e}"}]
353
+ finally:
354
+ try:
355
+ await client.beta.assistants.delete(assistant.id)
356
+ except Exception as e:
357
+ logging.info(f"Error deleting assistant: {e}")
358
+
359
+
360
+ async def process_task(client, assistant, thread, task, task_outputs):
361
+ """
362
+ Process a single task in the workflow.
363
+ """
364
+ # Prepare inputs
365
+ task_inputs = await prepare_task_inputs(task, task_outputs)
366
+
367
+ # Run the operation
368
+ output = await run_task_operation(client, assistant, thread, task, task_inputs)
369
+
370
+ # Store outputs
371
+ await store_task_outputs(task, output, task_outputs)
372
+
373
+ return task_outputs
374
+
375
+ async def read_csv_rows(file_path):
376
+ rows = []
377
+ with open(file_path, mode='r') as file:
378
+ csv_reader = csv.reader(file)
379
+ for row in csv_reader:
380
+ rows.append(row)
381
+ return rows
382
+
383
+ async def prepare_task_inputs(task, task_outputs):
384
+ """
385
+ Prepare the inputs for a task based on its input specifications.
386
+ """
387
+ inputs = task.get('inputs', {})
388
+ task_inputs = {}
389
+ for input_name, input_spec in inputs.items():
390
+ source = input_spec.get('source', {})
391
+ source_type = source.get('type', '')
392
+ format = input_spec.get('format', 'list')
393
+ if source_type == 'inline':
394
+ # Get from inline source
395
+ input_data = source.get('data')
396
+ elif source_type == 'task_output':
397
+ # Get from previous task output
398
+ task_id = source.get('task_id')
399
+ output_key = source.get('output_key')
400
+ previous_task_output = task_outputs.get(task_id, {})
401
+ if isinstance(previous_task_output, dict):
402
+ output_item = previous_task_output.get(output_key)
403
+ input_data = output_item['data']
404
+ else:
405
+ input_data = previous_task_output
406
+
407
+ # Ensure input_data is a list
408
+ if not isinstance(input_data, list):
409
+ input_data = [input_data]
410
+ elif source_type == 'google_drive':
411
+ # Handle Google Drive source
412
+ path = source.get('location')
413
+ input_data_path = await read_from_google_drive(path)
414
+ input_data = await read_csv_rows(input_data_path)
415
+ elif source_type == 'local_path':
416
+ # Handle local path source
417
+ input_data_path = source.get('location')
418
+ input_data = await read_csv_rows(input_data_path)
419
+ else:
420
+ input_data = None
421
+ if input_data:
422
+ task_inputs[input_name] = {
423
+ "format": format,
424
+ "data" : input_data
425
+ }
426
+ return task_inputs
427
+
428
+ async def process_using_ai_assistant(prompt_template, task_inputs, client, assistant, thread, response_type, allowed_tools, task):
429
+ outputs = []
430
+ for key, value in task_inputs.items():
431
+ format = value.get('format', 'list')
432
+ items = value.get('data')
433
+ if format == 'list':
434
+ for item in items:
435
+ formatted_prompt = prompt_template.replace(
436
+ "{{ inputs." + key + " }}", json.dumps(item)
437
+ )
438
+ # Run assistant with prompt
439
+ logging.info(formatted_prompt)
440
+ output = await extract_and_structure_data(
441
+ client, assistant, thread, formatted_prompt, task_inputs, response_type, allowed_tools
442
+ )
443
+ if output and output == 'FAIL':
444
+ pass
445
+ output_json = None
446
+ if isinstance(output, str):
447
+ try:
448
+ output_json = json.loads(output)
449
+ except json.JSONDecodeError:
450
+ pass
451
+ if (
452
+ output_json
453
+ and isinstance(output_json, dict)
454
+ and 'data' in output_json
455
+ and isinstance(output_json['data'], list)
456
+ ):
457
+ # Deserialize the JSON to responseType
458
+ items_deserialized = [response_type.parse_obj(item) for item in output_json['data']]
459
+ # Iterate over items_deserialized
460
+ for item in items_deserialized:
461
+ # Serialize each item back to JSON
462
+ serialized_item = json.dumps(item.dict())
463
+ outputs.append(serialized_item)
464
+ elif output_json and isinstance(output_json, dict):
465
+ output_deserialized = response_type.parse_obj(output_json)
466
+ outputs.append(json.dumps(output_deserialized.dict()))
467
+ else:
468
+ logging.warning("output_json is None or not a dict")
469
+ if outputs and len(outputs) > 0:
470
+ interim_return_val = {
471
+ "data": outputs,
472
+ "format": "list"
473
+ }
474
+ await store_task_outputs_interim_checkpoint(task, interim_return_val, task_inputs)
475
+ else:
476
+ # Handle other formats if necessary
477
+ pass
478
+ return outputs
479
+
480
+ async def process_transform_json(task_inputs, response_type, task):
481
+ outputs = []
482
+ task_id = task.get('id')
483
+ for input_name, input_info in task_inputs.items():
484
+ data_format = input_info.get('format', 'list')
485
+ input_info.get('transform_function_name', f"{task_id}_transform_input_json")
486
+ items = input_info.get('data')
487
+ if data_format == 'list':
488
+ if items and len(items) > 0:
489
+ # Generate the transformation function
490
+ # if GLOBAL_GENERATED_PYTHON_CODE.get(transform_function_name, ''):
491
+ # transformation_function = GLOBAL_GENERATED_PYTHON_CODE[transform_function_name]
492
+ # else:
493
+ # function_name = await transform_json_with_type(
494
+ # items[0],
495
+ # response_type,
496
+ # transform_function_name
497
+ # )
498
+ # transformation_function = GLOBAL_GENERATED_PYTHON_CODE[function_name]
499
+ transformation_function = lambda x: x
500
+ for item in items:
501
+ input_json = json.loads(item)
502
+ output_json = transformation_function(input_json)
503
+ output_deserialized = response_type.parse_obj(output_json)
504
+ outputs.append(json.dumps(output_deserialized.dict()))
505
+ if outputs:
506
+ interim_return_val = {
507
+ "data": outputs,
508
+ "format": "list"
509
+ }
510
+ await store_task_outputs_interim_checkpoint(task, interim_return_val, task_inputs)
511
+ else:
512
+ # Handle other formats if necessary
513
+ pass
514
+ return outputs
515
+
516
+ async def process_function_call(operation, task_inputs, outputs):
517
+ function_name = operation.get('function', '')
518
+ args = operation.get('args', [])
519
+ function = GLOBAL_TOOLS_FUNCTIONS.get(function_name)
520
+ if function is None:
521
+ raise Exception(f"Function {function_name} not found.")
522
+
523
+ for key, value in task_inputs.items():
524
+ format = value.get('format', 'list')
525
+ items = value.get('data')
526
+ item_parse_args_with_llm = operation.get('args_llm_parsed', 'False')
527
+ if format == 'list':
528
+ for item in items:
529
+ # Prepare function keyword arguments
530
+ if item_parse_args_with_llm == 'True':
531
+ function_kwargs, status = await get_function_call_arguments(
532
+ item, function_name
533
+ )
534
+ if status == 'FAIL':
535
+ continue
536
+ else:
537
+ function_kwargs = {arg: item.get(arg, '') for arg in args}
538
+ if asyncio.iscoroutinefunction(function):
539
+ output = await function(**function_kwargs)
540
+ else:
541
+ output = function(**function_kwargs)
542
+ process_output(output, outputs)
543
+ else:
544
+ # Prepare function arguments
545
+ function_kwargs = {
546
+ arg: task_inputs.get(arg, {}).get("data", '') for arg in args
547
+ }
548
+ if asyncio.iscoroutinefunction(function):
549
+ output = await function(**function_kwargs)
550
+ else:
551
+ output = function(**function_kwargs)
552
+ process_output(output, outputs)
553
+ return outputs
554
+
555
+ def process_output(output, outputs):
556
+ if isinstance(output, list):
557
+ for item in output:
558
+ if isinstance(item, dict):
559
+ outputs.append(json.dumps(item))
560
+ else:
561
+ outputs.append(item)
562
+ else:
563
+ outputs.append(output)
564
+
565
+ async def run_task_operation(client, assistant, thread, task, task_inputs):
566
+ """
567
+ Execute the operation defined in the task.
568
+ """
569
+ operation = task.get('operation', {})
570
+ operation_type = operation.get('type', '')
571
+ allowed_tools = operation.get('allowed_tools', [])
572
+ response_type_name = operation.get('response_type', 'GenericList')
573
+ response_type = lookup_response_type(response_type_name)
574
+ outputs = []
575
+
576
+ if operation_type == 'ai_assistant_call':
577
+ prompt_template = operation.get('prompt', '')
578
+ outputs = await process_using_ai_assistant(
579
+ prompt_template, task_inputs, client, assistant, thread, response_type, allowed_tools, task
580
+ )
581
+ elif operation_type == 'ai_transform_input_json':
582
+ outputs = await process_transform_json(
583
+ task_inputs, response_type, task
584
+ )
585
+ elif operation_type == 'python_callable':
586
+ outputs = await process_function_call(operation, task_inputs, outputs)
587
+ return {
588
+ "data": outputs,
589
+ "format": "list"
590
+ }
591
+
592
+ async def store_task_outputs_interim_checkpoint(task, output, task_outputs):
593
+ """
594
+ Store the outputs of a task for use in subsequent tasks.
595
+ """
596
+ outputs = task.get('outputs', {})
597
+ if outputs:
598
+ for output_name, output_spec in outputs.items():
599
+ destination = output_spec.get('destination', {})
600
+ if destination:
601
+ dest_type = destination.get('type')
602
+ path_template = destination.get('path_template')
603
+ if path_template:
604
+ current_timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
605
+ current_timestamp = '_interim'
606
+ path = path_template.replace('{timestamp}', current_timestamp)
607
+ path = path.replace('{task_id}', task['id'])
608
+ local_path = path
609
+
610
+ if dest_type == 'google_drive':
611
+ local_path = os.path.join('/tmp', task['id'], path)
612
+
613
+ if dest_type == 'google_drive' or dest_type == 'local_path':
614
+ directory = os.path.dirname(local_path)
615
+ if directory and not os.path.exists(directory):
616
+ os.makedirs(directory)
617
+ logging.info(f"Writing output to {local_path}\n")
618
+
619
+ if output.get("format", "") == 'list':
620
+ data_list = []
621
+ for item in output.get("data", []):
622
+ try:
623
+ data_list.append(json.loads(item))
624
+ except json.JSONDecodeError:
625
+ # Handle or log the invalid JSON item
626
+ pass
627
+ # Write the full list first with a 'full_list' prefix
628
+ def get_prefixed_path(file_path, prefix):
629
+ directory, filename = os.path.split(file_path)
630
+ name, ext = os.path.splitext(filename)
631
+ prefixed_filename = f"{prefix}_{name}{ext}"
632
+ return os.path.join(directory, prefixed_filename)
633
+
634
+ full_list_local_path = get_prefixed_path(local_path, 'full_list')
635
+ full_list_directory = os.path.dirname(full_list_local_path)
636
+ if full_list_directory and not os.path.exists(full_list_directory):
637
+ os.makedirs(full_list_directory)
638
+ logging.info(f"Writing full list output to {full_list_local_path}\n")
639
+
640
+ with open(full_list_local_path, 'w') as full_file:
641
+ if data_list and len(data_list) > 0:
642
+ headers = [key for key in data_list[0].keys()]
643
+ writer = csv.DictWriter(full_file, fieldnames=headers)
644
+ writer.writeheader()
645
+ for data in data_list:
646
+ filtered_data = {key: value for key, value in data.items() if key in headers}
647
+ writer.writerow(filtered_data)
648
+ else:
649
+ writer = csv.DictWriter(full_file, fieldnames=[])
650
+ writer.writeheader()
651
+ return task_outputs
652
+
653
+ def filter_data_list(data_list, filter_by):
654
+ """
655
+ Filter the data_list based on conditions specified in filter_by.
656
+ Supported operators: 'gt', 'lt', 'eq', 'gte', 'lte', 'ne'
657
+ """
658
+ from operator import gt, lt, eq, ge, le, ne
659
+
660
+ operator_map = {
661
+ 'gt': gt,
662
+ 'lt': lt,
663
+ 'eq': eq,
664
+ 'gte': ge,
665
+ 'lte': le,
666
+ 'ne': ne
667
+ }
668
+
669
+ filtered_list = []
670
+ for item in data_list:
671
+ include_item = True
672
+ for property_name, conditions in filter_by.items():
673
+ value = item.get(property_name)
674
+ if value is None or (isinstance(value, str) and value.strip() == ""):
675
+ include_item = False
676
+ break
677
+ for op, compare_value in conditions.items():
678
+ op_func = operator_map.get(op)
679
+ if op_func is None:
680
+ continue # Unsupported operator
681
+ try:
682
+ # Convert values to float for comparison if possible
683
+ item_value = float(value)
684
+ compare_value = float(compare_value)
685
+ except (ValueError, TypeError):
686
+ item_value = value
687
+ if not op_func(item_value, compare_value):
688
+ include_item = False
689
+ break
690
+ if not include_item:
691
+ break
692
+ if include_item:
693
+ filtered_list.append(item)
694
+ return filtered_list
695
+
696
+ def convert_value(value):
697
+ """
698
+ Convert the value to the appropriate type for sorting.
699
+ """
700
+ if value is None or (isinstance(value, str) and value.strip() == ""):
701
+ return ""
702
+ try:
703
+ return float(value)
704
+ except ValueError:
705
+ pass
706
+ try:
707
+ return datetime.fromisoformat(value)
708
+ except ValueError:
709
+ pass
710
+ return str(value)
711
+
712
+ def filter_and_sort(data_list, output_spec):
713
+ """
714
+ Filter and sort the data_list based on the output_spec.
715
+ """
716
+ required_properties = output_spec.get('required_properties', [])
717
+ if required_properties:
718
+ data_list = remove_empty_property_rows(data_list, required_properties)
719
+
720
+ dedup_by = output_spec.get('deduplication_properties', [])
721
+ if dedup_by:
722
+ data_list = deduplicate_list_by_properties(data_list, dedup_by)
723
+
724
+ sort_by_asc = output_spec.get('sort_by_asc', [])
725
+ sort_by_desc = output_spec.get('sort_by_desc', [])
726
+
727
+ # Combine sort fields with their corresponding order
728
+ sort_fields = [(key, True) for key in sort_by_asc] + [(key, False) for key in sort_by_desc]
729
+ logging.info(f"Sorting by: {sort_fields}")
730
+
731
+ # Sort from least significant to most significant key
732
+ for key, ascending in reversed(sort_fields):
733
+ data_list = sorted(
734
+ data_list,
735
+ key=lambda x: convert_value(x.get(key)),
736
+ reverse=not ascending
737
+ )
738
+
739
+ filter_by = output_spec.get('filter_by', {})
740
+ if filter_by:
741
+ data_list = filter_data_list(data_list, filter_by)
742
+
743
+ return data_list
744
+
745
+ # Store the output of a task run.
746
+ async def store_task_outputs(task, output, task_outputs):
747
+ """
748
+ Store the outputs of a task for use in subsequent tasks.
749
+ """
750
+ outputs = task.get('outputs', {})
751
+ if outputs:
752
+ for output_name, output_spec in outputs.items():
753
+ # Store output in task_outputs using task id and output_name
754
+ if task['id'] not in task_outputs:
755
+ task_outputs[task['id']] = {}
756
+
757
+ destination = output_spec.get('destination', {})
758
+ if destination:
759
+ dest_type = destination.get('type')
760
+ path_template = destination.get('path_template')
761
+ if path_template:
762
+ current_timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
763
+ path = path_template.replace('{timestamp}', current_timestamp)
764
+ path = path.replace('{task_id}', task['id'])
765
+ local_path = path
766
+
767
+ if dest_type == 'google_drive':
768
+ local_path = os.path.join('/tmp', task['id'], path)
769
+
770
+ if dest_type == 'google_drive' or dest_type == 'local_path':
771
+ directory = os.path.dirname(local_path)
772
+ if directory and not os.path.exists(directory):
773
+ os.makedirs(directory)
774
+ logging.info(f"Writing output to {local_path}\n")
775
+
776
+ if output.get("format", "") == 'list':
777
+ data_list = []
778
+ for item in output.get("data", []):
779
+ try:
780
+ data_list.append(json.loads(item))
781
+ except json.JSONDecodeError:
782
+ # Handle or log the invalid JSON item
783
+ pass
784
+ logging.info(f"Total count: {len(data_list)}")
785
+
786
+ # Write the full list first with a 'full_list' prefix
787
+ def get_prefixed_path(file_path, prefix):
788
+ directory, filename = os.path.split(file_path)
789
+ name, ext = os.path.splitext(filename)
790
+ prefixed_filename = f"{prefix}_{name}{ext}"
791
+ return os.path.join(directory, prefixed_filename)
792
+
793
+ full_list_local_path = get_prefixed_path(local_path, 'full_list')
794
+ full_list_directory = os.path.dirname(full_list_local_path)
795
+ if full_list_directory and not os.path.exists(full_list_directory):
796
+ os.makedirs(full_list_directory)
797
+ logging.info(f"Writing full list output to {full_list_local_path}\n")
798
+
799
+ with open(full_list_local_path, 'w') as full_file:
800
+ if data_list and len(data_list) > 0:
801
+ headers = [key for key in data_list[0].keys()]
802
+ writer = csv.DictWriter(full_file, fieldnames=headers)
803
+ writer.writeheader()
804
+ for data in data_list:
805
+ filtered_data = {key: value for key, value in data.items() if key in headers}
806
+ writer.writerow(filtered_data)
807
+ else:
808
+ writer = csv.DictWriter(full_file, fieldnames=[])
809
+ writer.writeheader()
810
+
811
+ if data_list and len(data_list) > 0:
812
+ data_list = filter_and_sort(data_list, output_spec)
813
+ if data_list and len(data_list) > 0:
814
+ logging.info(f"Deduped and removed count: {len(data_list)}")
815
+ headers = [key for key, value in data_list[0].items() if isinstance(value, (str, int, float, bool))]
816
+ with open(local_path, 'w') as file:
817
+ writer = csv.DictWriter(file, fieldnames=headers)
818
+ writer.writeheader()
819
+ for data in data_list:
820
+ filtered_data = {key: value for key, value in data.items() if key in headers}
821
+ writer.writerow(filtered_data)
822
+ else:
823
+ writer = csv.DictWriter(full_file, fieldnames=[])
824
+ writer.writeheader()
825
+ else:
826
+ with open(local_path, 'w') as file:
827
+ writer = csv.DictWriter(file, fieldnames=[])
828
+ writer.writeheader()
829
+ else:
830
+ with open(local_path, 'w') as file:
831
+ file.write(str(output))
832
+ else:
833
+ pass
834
+ if dest_type == 'google_drive':
835
+ await write_to_google_drive(path, local_path)
836
+
837
+ task_outputs[task['id']][output_name] = output
838
+ else:
839
+ task_outputs[task['id']] = output
840
+
841
+ # Remove rows with None or empty values for the specified properties.
842
+ def remove_empty_property_rows(data_list, properties):
843
+ """
844
+ Remove rows with None or empty values for the specified properties.
845
+ """
846
+ filtered_list = []
847
+ for item in data_list:
848
+ empty = False
849
+ for property_name in properties:
850
+ value = item.get(property_name)
851
+ if value is None or (isinstance(value, str) and value.strip() == ""):
852
+ empty = True
853
+ break
854
+ if not empty:
855
+ filtered_list.append(item)
856
+ return filtered_list
857
+
858
+ # Deduplicate list by given input properties.
859
+ def deduplicate_list_by_properties(data_list, properties):
860
+ """
861
+ Deduplicate a list of dictionaries by a list of properties in order.
862
+ Only deduplicate if the property value is not None or empty, strip spaces, and compare in lowercase.
863
+ """
864
+ for property_name in properties:
865
+ seen = set()
866
+ deduplicated_list = []
867
+ for item in data_list:
868
+ value = item.get(property_name)
869
+ value = str(value or "").strip().lower()
870
+ if value == "":
871
+ deduplicated_list.append(item)
872
+ elif value not in seen:
873
+ seen.add(value)
874
+ deduplicated_list.append(item)
875
+ data_list = deduplicated_list
876
+ return data_list
877
+
878
+ async def write_to_google_drive(cloud_path, local_path):
879
+ # Placeholder function for writing to Google Drive
880
+ await write_content_to_googledrive(cloud_path, local_path)
881
+ logging.info(f"Writing to Google Drive at {cloud_path} {local_path}")
882
+
883
+
884
+ # Get a dynamic pyndantic model that corresponds to the function signature
885
+ def get_dynamic_model(function_name: str, function: Callable) -> Type[BaseModel]:
886
+ """
887
+ Dynamically creates a Pydantic BaseModel subclass based on the parameters of the given function.
888
+
889
+ Args:
890
+ function_name (str): The name of the function.
891
+ function (Callable): The function object.
892
+
893
+ Returns:
894
+ Type[BaseModel]: A dynamically created Pydantic model class.
895
+ """
896
+ # Retrieve the function's signature
897
+ signature = inspect.signature(function)
898
+ fields = {}
899
+
900
+ for param_name, param in signature.parameters.items():
901
+ # Extract the parameter's type annotation
902
+ annotation = param.annotation if param.annotation is not inspect.Parameter.empty else Any
903
+
904
+ # Determine if the parameter has a default value
905
+ if param.default is not inspect.Parameter.empty:
906
+ default_value = param.default
907
+ else:
908
+ default_value = ...
909
+
910
+ # Create a Field with a description
911
+ field_info = Field(
912
+ default=default_value,
913
+ description=f"Parameter '{param_name}' of type '{annotation.__name__}'"
914
+ )
915
+
916
+ # Add the field to the fields dictionary
917
+ fields[param_name] = (annotation, field_info)
918
+
919
+ # Create and return the dynamic model
920
+ dynamic_model = create_model(
921
+ f"{function_name}_Arguments",
922
+ __base__=BaseModel,
923
+ **fields
924
+ )
925
+
926
+ return dynamic_model
927
+
928
+ # Given a function definition and input string extract the function call arguments using OpenAI API
929
+ async def get_function_call_arguments(input_text: str, function_name: str) -> Tuple[Dict[str, Any], str]:
930
+ """
931
+ Extracts function call arguments from the input text using OpenAI's API.
932
+
933
+ Args:
934
+ input_text (str): The input text containing the function call.
935
+ function_name (str): The name of the function to extract arguments for.
936
+
937
+ Returns:
938
+ Tuple[Dict[str, Any], str]: A tuple containing the function arguments as a dictionary and a status message.
939
+ """
940
+ try:
941
+ # Retrieve the function and its parameters
942
+ function, _ = get_function(function_name)
943
+
944
+ # Generate a dynamic Pydantic model based on the function's parameters
945
+ dynamic_model = get_dynamic_model(function_name, function)
946
+
947
+ # Define the tool using the dynamic model
948
+ tool = pydantic_function_tool(dynamic_model)
949
+
950
+ # Construct the prompt
951
+ prompt = f"Extract the arguments for the function '{function_name}' from the following input:\n\n{input_text}"
952
+
953
+ # Initialize the OpenAI client
954
+ client = AsyncOpenAI()
955
+
956
+ # Make the API call
957
+ response = await client.beta.chat.completions.parse(
958
+ model="gpt-5.1-chat",
959
+ messages=[
960
+ {"role": "system", "content": "Extract function arguments in JSON format."},
961
+ {"role": "user", "content": prompt},
962
+ ],
963
+ tools=[tool],
964
+ response_format=dynamic_model
965
+ )
966
+
967
+ # Extract the function call arguments from the response
968
+ parsed_output = vars(response.choices[0].message.parsed)
969
+ return parsed_output, 'SUCCESS'
970
+
971
+ except OpenAIError as e:
972
+ logging.error(f"OpenAI API error: {e}")
973
+ raise HTTPException(status_code=502, detail="Error communicating with the OpenAI API.")
974
+ except Exception as e:
975
+ logging.error(f"Unexpected error: {e}")
976
+ raise HTTPException(status_code=500, detail="An unexpected error occurred while processing your request.")
977
+