ragaai-catalyst 2.1.5b0__tar.gz → 2.1.5b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {ragaai_catalyst-2.1.5b0/ragaai_catalyst.egg-info → ragaai_catalyst-2.1.5b2}/PKG-INFO +1 -1
  2. ragaai_catalyst-2.1.5b2/examples/sync_sample_call.py +57 -0
  3. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/pyproject.toml +1 -1
  4. ragaai_catalyst-2.1.5b2/ragaai_catalyst/dataset.py +603 -0
  5. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +20 -4
  6. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +36 -113
  7. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +17 -2
  8. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +52 -1
  9. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +16 -19
  10. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +37 -3
  11. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +17 -1
  12. ragaai_catalyst-2.1.5b2/ragaai_catalyst/tracers/agentic_tracing/utils/system_monitor.py +215 -0
  13. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/distributed.py +46 -19
  14. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/tracer.py +2 -2
  15. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2/ragaai_catalyst.egg-info}/PKG-INFO +1 -1
  16. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst.egg-info/SOURCES.txt +2 -0
  17. ragaai_catalyst-2.1.5b0/ragaai_catalyst/dataset.py +0 -273
  18. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  19. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  20. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  21. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/.gitignore +0 -0
  22. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/LICENSE +0 -0
  23. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/README.md +0 -0
  24. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/docs/dataset_management.md +0 -0
  25. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/docs/prompt_management.md +0 -0
  26. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/FinancialAnalysisSystem.ipynb +0 -0
  27. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/TravelPlanner.ipynb +0 -0
  28. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/custom_tracer_example.py +0 -0
  29. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/finance.py +0 -0
  30. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/prompt_management_litellm.ipynb +0 -0
  31. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/prompt_management_openai.ipynb +0 -0
  32. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/travel_agent/agents.py +0 -0
  33. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/travel_agent/config.py +0 -0
  34. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/travel_agent/main.py +0 -0
  35. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/examples/travel_agent/tools.py +0 -0
  36. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/__init__.py +0 -0
  37. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/_version.py +0 -0
  38. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/evaluation.py +0 -0
  39. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/experiment.py +0 -0
  40. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/guard_executor.py +0 -0
  41. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/guardrails_manager.py +0 -0
  42. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/internal_api_completion.py +0 -0
  43. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/prompt_manager.py +0 -0
  44. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/proxy_call.py +0 -0
  45. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/ragaai_catalyst.py +0 -0
  46. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/synthetic_data_generation.py +0 -0
  47. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/__init__.py +0 -0
  48. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/README.md +0 -0
  49. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/__init__.py +0 -0
  50. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/data/__init__.py +0 -0
  51. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +0 -0
  52. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tests/FinancialAnalysisSystem.ipynb +0 -0
  53. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tests/GameActivityEventPlanner.ipynb +0 -0
  54. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tests/TravelPlanner.ipynb +0 -0
  55. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tests/__init__.py +0 -0
  56. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tests/ai_travel_agent.py +0 -0
  57. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tests/unique_decorator_test.py +0 -0
  58. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/__init__.py +0 -0
  59. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py +0 -0
  60. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +0 -0
  61. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py +0 -0
  62. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/upload/__init__.py +0 -0
  63. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +0 -0
  64. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +0 -0
  65. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +0 -0
  66. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +0 -0
  67. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +0 -0
  68. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +0 -0
  69. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +0 -0
  70. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +0 -0
  71. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/get_user_trace_metrics.py +0 -0
  72. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -0
  73. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +0 -0
  74. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -0
  75. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +0 -0
  76. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +0 -0
  77. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/exporters/__init__.py +0 -0
  78. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/exporters/file_span_exporter.py +0 -0
  79. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/exporters/raga_exporter.py +0 -0
  80. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/instrumentators/__init__.py +0 -0
  81. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/instrumentators/langchain.py +0 -0
  82. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/instrumentators/llamaindex.py +0 -0
  83. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/instrumentators/openai.py +0 -0
  84. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/llamaindex_callback.py +0 -0
  85. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/upload_traces.py +0 -0
  86. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/utils/__init__.py +0 -0
  87. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/tracers/utils/utils.py +0 -0
  88. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst/utils.py +0 -0
  89. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst.egg-info/dependency_links.txt +0 -0
  90. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst.egg-info/requires.txt +0 -0
  91. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/ragaai_catalyst.egg-info/top_level.txt +0 -0
  92. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/requirements.txt +0 -0
  93. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/setup.cfg +0 -0
  94. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/test/test_catalyst/test_configuration.py +0 -0
  95. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/test/test_catalyst/test_dataset.py +0 -0
  96. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/test/test_catalyst/test_evaluation.py +0 -0
  97. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/test/test_catalyst/test_prompt_manager.py +0 -0
  98. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b2}/test/test_catalyst/test_synthetic_data_generation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ragaai_catalyst
3
- Version: 2.1.5b0
3
+ Version: 2.1.5b2
4
4
  Summary: RAGA AI CATALYST
5
5
  Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
6
6
  Requires-Python: <3.13,>=3.9
@@ -0,0 +1,57 @@
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ load_dotenv()
5
+ from litellm import completion
6
+ import openai
7
+ from openai import OpenAI
8
+ from ragaai_catalyst.tracers import Tracer
9
+ from ragaai_catalyst import RagaAICatalyst
10
+
11
+ catalyst = RagaAICatalyst(
12
+ access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
13
+ secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
14
+ base_url=os.getenv("RAGAAI_CATALYST_BASE_URL"),
15
+ )
16
+ # Initialize tracer
17
+ tracer = Tracer(
18
+ project_name="alteryx_copilot-tan",
19
+ dataset_name="testing-1",
20
+ tracer_type="Agentic",
21
+ auto_instrumentation=
22
+ {
23
+ "user_interaction": False,
24
+ "file_io": True
25
+ }
26
+ )
27
+
28
+ tracer.start()
29
+ @tracer.trace_tool(name="llm_call")
30
+ def llm_call(prompt, max_tokens=512, model="gpt-3.5-turbo"):
31
+ client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
32
+ print(f"Prompt: {prompt}")
33
+ print(f"Max Tokens: {max_tokens}")
34
+ print(f"Model: {model}")
35
+ input("Press Enter to continue...")
36
+ with open("response.txt", "w") as f:
37
+ f.write("test")
38
+ try:
39
+ response = client.chat.completions.create(
40
+ model=model,
41
+ messages=[{"role": "user", "content": prompt}],
42
+ max_tokens=max_tokens,
43
+ temperature=0.7,
44
+ )
45
+ return response.choices[0].message.content.strip()
46
+ except Exception as e:
47
+ print(f"Error in llm_call: {str(e)}")
48
+ raise
49
+
50
+
51
+ def main():
52
+ response = llm_call("how are you?")
53
+ print(f"Response: {response}")
54
+
55
+ if __name__ == "__main__":
56
+ main()
57
+ tracer.stop()
@@ -9,7 +9,7 @@ readme = "README.md"
9
9
  requires-python = ">=3.9,<3.13"
10
10
  # license = {file = "LICENSE"}
11
11
 
12
- version = "2.1.5.beta.0"
12
+ version = "2.1.5.beta.2"
13
13
  authors = [
14
14
  {name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
15
15
  {name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
@@ -0,0 +1,603 @@
1
+ import os
2
+ import json
3
+ import requests
4
+ from .utils import response_checker
5
+ from typing import Union
6
+ import logging
7
+ from .ragaai_catalyst import RagaAICatalyst
8
+ import pandas as pd
9
+ logger = logging.getLogger(__name__)
10
+ get_token = RagaAICatalyst.get_token
11
+
12
+
13
+ class Dataset:
14
+ BASE_URL = None
15
+ TIMEOUT = 30
16
+
17
+ def __init__(self, project_name):
18
+ self.project_name = project_name
19
+ self.num_projects = 99999
20
+ Dataset.BASE_URL = RagaAICatalyst.BASE_URL
21
+ headers = {
22
+ "Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
23
+ }
24
+ try:
25
+ response = requests.get(
26
+ f"{Dataset.BASE_URL}/v2/llm/projects?size={self.num_projects}",
27
+ headers=headers,
28
+ timeout=self.TIMEOUT,
29
+ )
30
+ response.raise_for_status()
31
+ logger.debug("Projects list retrieved successfully")
32
+
33
+ project_list = [
34
+ project["name"] for project in response.json()["data"]["content"]
35
+ ]
36
+
37
+ if project_name not in project_list:
38
+ raise ValueError("Project not found. Please enter a valid project name")
39
+
40
+ self.project_id = [
41
+ project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
42
+ ][0]
43
+
44
+ except requests.exceptions.RequestException as e:
45
+ logger.error(f"Failed to retrieve projects list: {e}")
46
+ raise
47
+
48
+ def list_datasets(self):
49
+ """
50
+ Retrieves a list of datasets for a given project.
51
+
52
+ Returns:
53
+ list: A list of dataset names.
54
+
55
+ Raises:
56
+ None.
57
+ """
58
+
59
+ def make_request():
60
+ headers = {
61
+ 'Content-Type': 'application/json',
62
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
63
+ "X-Project-Id": str(self.project_id),
64
+ }
65
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
66
+ try:
67
+ response = requests.post(
68
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
69
+ headers=headers,
70
+ json=json_data,
71
+ timeout=Dataset.TIMEOUT,
72
+ )
73
+ response.raise_for_status()
74
+ return response
75
+ except requests.exceptions.RequestException as e:
76
+ logger.error(f"Failed to list datasets: {e}")
77
+ raise
78
+
79
+ try:
80
+ response = make_request()
81
+ response_checker(response, "Dataset.list_datasets")
82
+ if response.status_code == 401:
83
+ get_token() # Fetch a new token and set it in the environment
84
+ response = make_request() # Retry the request
85
+ if response.status_code != 200:
86
+ return {
87
+ "status_code": response.status_code,
88
+ "message": response.json(),
89
+ }
90
+ datasets = response.json()["data"]["content"]
91
+ dataset_list = [dataset["name"] for dataset in datasets]
92
+ return dataset_list
93
+ except Exception as e:
94
+ logger.error(f"Error in list_datasets: {e}")
95
+ raise
96
+
97
+ def get_schema_mapping(self):
98
+ headers = {
99
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
100
+ "X-Project-Name": self.project_name,
101
+ }
102
+ try:
103
+ response = requests.get(
104
+ f"{Dataset.BASE_URL}/v1/llm/schema-elements",
105
+ headers=headers,
106
+ timeout=Dataset.TIMEOUT,
107
+ )
108
+ response.raise_for_status()
109
+ response_data = response.json()["data"]["schemaElements"]
110
+ if not response.json()['success']:
111
+ raise ValueError('Unable to fetch Schema Elements for the CSV')
112
+ return response_data
113
+ except requests.exceptions.RequestException as e:
114
+ logger.error(f"Failed to get CSV schema: {e}")
115
+ raise
116
+
117
+ ###################### CSV Upload APIs ###################
118
+
119
+ def get_dataset_columns(self, dataset_name):
120
+ list_dataset = self.list_datasets()
121
+ if dataset_name not in list_dataset:
122
+ raise ValueError(f"Dataset {dataset_name} does not exists. Please enter a valid dataset name")
123
+
124
+ headers = {
125
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
126
+ "X-Project-Name": self.project_name,
127
+ }
128
+ headers = {
129
+ 'Content-Type': 'application/json',
130
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
131
+ "X-Project-Id": str(self.project_id),
132
+ }
133
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
134
+ try:
135
+ response = requests.post(
136
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
137
+ headers=headers,
138
+ json=json_data,
139
+ timeout=Dataset.TIMEOUT,
140
+ )
141
+ response.raise_for_status()
142
+ datasets = response.json()["data"]["content"]
143
+ dataset_id = [dataset["id"] for dataset in datasets if dataset["name"]==dataset_name][0]
144
+ except requests.exceptions.RequestException as e:
145
+ logger.error(f"Failed to list datasets: {e}")
146
+ raise
147
+
148
+ try:
149
+ response = requests.get(
150
+ f"{Dataset.BASE_URL}/v2/llm/dataset/{dataset_id}?initialCols=0",
151
+ headers=headers,
152
+ timeout=Dataset.TIMEOUT,
153
+ )
154
+ response.raise_for_status()
155
+ dataset_columns = response.json()["data"]["datasetColumnsResponses"]
156
+ dataset_columns = [item["displayName"] for item in dataset_columns]
157
+ dataset_columns = [data for data in dataset_columns if not data.startswith('_')]
158
+ if not response.json()['success']:
159
+ raise ValueError('Unable to fetch details of for the CSV')
160
+ return dataset_columns
161
+ except requests.exceptions.RequestException as e:
162
+ logger.error(f"Failed to get CSV columns: {e}")
163
+ raise
164
+
165
+ def create_from_csv(self, csv_path, dataset_name, schema_mapping):
166
+ list_dataset = self.list_datasets()
167
+ if dataset_name in list_dataset:
168
+ raise ValueError(f"Dataset name {dataset_name} already exists. Please enter a unique dataset name")
169
+
170
+ #### get presigned URL
171
+ def get_presignedUrl():
172
+ headers = {
173
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
174
+ "X-Project-Id": str(self.project_id),
175
+ }
176
+ try:
177
+ response = requests.get(
178
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
179
+ headers=headers,
180
+ timeout=Dataset.TIMEOUT,
181
+ )
182
+ response.raise_for_status()
183
+ return response.json()
184
+ except requests.exceptions.RequestException as e:
185
+ logger.error(f"Failed to get presigned URL: {e}")
186
+ raise
187
+
188
+ try:
189
+ presignedUrl = get_presignedUrl()
190
+ if presignedUrl['success']:
191
+ url = presignedUrl['data']['presignedUrl']
192
+ filename = presignedUrl['data']['fileName']
193
+ else:
194
+ raise ValueError('Unable to fetch presignedUrl')
195
+ except Exception as e:
196
+ logger.error(f"Error in get_presignedUrl: {e}")
197
+ raise
198
+
199
+ #### put csv to presigned URL
200
+ def put_csv_to_presignedUrl(url):
201
+ headers = {
202
+ 'Content-Type': 'text/csv',
203
+ 'x-ms-blob-type': 'BlockBlob',
204
+ }
205
+ try:
206
+ with open(csv_path, 'rb') as file:
207
+ response = requests.put(
208
+ url,
209
+ headers=headers,
210
+ data=file,
211
+ timeout=Dataset.TIMEOUT,
212
+ )
213
+ response.raise_for_status()
214
+ return response
215
+ except requests.exceptions.RequestException as e:
216
+ logger.error(f"Failed to put CSV to presigned URL: {e}")
217
+ raise
218
+
219
+ try:
220
+
221
+ put_csv_response = put_csv_to_presignedUrl(url)
222
+ print(put_csv_response)
223
+ if put_csv_response.status_code not in (200, 201):
224
+ raise ValueError('Unable to put csv to the presignedUrl')
225
+ except Exception as e:
226
+ logger.error(f"Error in put_csv_to_presignedUrl: {e}")
227
+ raise
228
+
229
+ ## Upload csv to elastic
230
+ def upload_csv_to_elastic(data):
231
+ header = {
232
+ 'Content-Type': 'application/json',
233
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
234
+ "X-Project-Id": str(self.project_id)
235
+ }
236
+ try:
237
+ response = requests.post(
238
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
239
+ headers=header,
240
+ json=data,
241
+ timeout=Dataset.TIMEOUT,
242
+ )
243
+ if response.status_code==400:
244
+ raise ValueError(response.json()["message"])
245
+ response.raise_for_status()
246
+ return response.json()
247
+ except requests.exceptions.RequestException as e:
248
+ logger.error(f"Failed to upload CSV to elastic: {e}")
249
+ raise
250
+
251
+ def generate_schema(mapping):
252
+ result = {}
253
+ for column, schema_element in mapping.items():
254
+ result[column] = {"columnType": schema_element}
255
+ return result
256
+
257
+ try:
258
+ schema_mapping = generate_schema(schema_mapping)
259
+ data = {
260
+ "projectId": str(self.project_id),
261
+ "datasetName": dataset_name,
262
+ "fileName": filename,
263
+ "schemaMapping": schema_mapping,
264
+ "opType": "insert",
265
+ "description": ""
266
+ }
267
+ upload_csv_response = upload_csv_to_elastic(data)
268
+ if not upload_csv_response['success']:
269
+ raise ValueError('Unable to upload csv')
270
+ else:
271
+ print(upload_csv_response['message'])
272
+ except Exception as e:
273
+ logger.error(f"Error in create_from_csv: {e}")
274
+ raise
275
+
276
+ def add_rows(self, csv_path, dataset_name):
277
+ """
278
+ Add rows to an existing dataset from a CSV file.
279
+
280
+ Args:
281
+ csv_path (str): Path to the CSV file to be added
282
+ dataset_name (str): Name of the existing dataset to add rows to
283
+
284
+ Raises:
285
+ ValueError: If dataset does not exist or columns are incompatible
286
+ """
287
+ # Get existing dataset columns
288
+ existing_columns = self.get_dataset_columns(dataset_name)
289
+
290
+ # Read the CSV file to check columns
291
+ try:
292
+ import pandas as pd
293
+ df = pd.read_csv(csv_path)
294
+ csv_columns = df.columns.tolist()
295
+ except Exception as e:
296
+ logger.error(f"Failed to read CSV file: {e}")
297
+ raise ValueError(f"Unable to read CSV file: {e}")
298
+
299
+ # Check column compatibility
300
+ for column in existing_columns:
301
+ if column not in csv_columns:
302
+ df[column] = None
303
+
304
+ # Get presigned URL for the CSV
305
+ def get_presignedUrl():
306
+ headers = {
307
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
308
+ "X-Project-Id": str(self.project_id),
309
+ }
310
+ try:
311
+ response = requests.get(
312
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
313
+ headers=headers,
314
+ timeout=Dataset.TIMEOUT,
315
+ )
316
+ response.raise_for_status()
317
+ return response.json()
318
+ except requests.exceptions.RequestException as e:
319
+ logger.error(f"Failed to get presigned URL: {e}")
320
+ raise
321
+
322
+ try:
323
+ presignedUrl = get_presignedUrl()
324
+ if presignedUrl['success']:
325
+ url = presignedUrl['data']['presignedUrl']
326
+ filename = presignedUrl['data']['fileName']
327
+ else:
328
+ raise ValueError('Unable to fetch presignedUrl')
329
+ except Exception as e:
330
+ logger.error(f"Error in get_presignedUrl: {e}")
331
+ raise
332
+
333
+ # Upload CSV to presigned URL
334
+ def put_csv_to_presignedUrl(url):
335
+ headers = {
336
+ 'Content-Type': 'text/csv',
337
+ 'x-ms-blob-type': 'BlockBlob',
338
+ }
339
+ try:
340
+ with open(csv_path, 'rb') as file:
341
+ response = requests.put(
342
+ url,
343
+ headers=headers,
344
+ data=file,
345
+ timeout=Dataset.TIMEOUT,
346
+ )
347
+ response.raise_for_status()
348
+ return response
349
+ except requests.exceptions.RequestException as e:
350
+ logger.error(f"Failed to put CSV to presigned URL: {e}")
351
+ raise
352
+
353
+ try:
354
+ put_csv_response = put_csv_to_presignedUrl(url)
355
+ if put_csv_response.status_code not in (200, 201):
356
+ raise ValueError('Unable to put csv to the presignedUrl')
357
+ except Exception as e:
358
+ logger.error(f"Error in put_csv_to_presignedUrl: {e}")
359
+ raise
360
+
361
+ # Prepare schema mapping (assuming same mapping as original dataset)
362
+ def generate_schema_mapping(dataset_name):
363
+ headers = {
364
+ 'Content-Type': 'application/json',
365
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
366
+ "X-Project-Id": str(self.project_id),
367
+ }
368
+ json_data = {
369
+ "size": 12,
370
+ "page": "0",
371
+ "projectId": str(self.project_id),
372
+ "search": ""
373
+ }
374
+ try:
375
+ # First get dataset details
376
+ response = requests.post(
377
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
378
+ headers=headers,
379
+ json=json_data,
380
+ timeout=Dataset.TIMEOUT,
381
+ )
382
+ response.raise_for_status()
383
+ datasets = response.json()["data"]["content"]
384
+ dataset_id = [dataset["id"] for dataset in datasets if dataset["name"]==dataset_name][0]
385
+
386
+ # Get dataset details to extract schema mapping
387
+ response = requests.get(
388
+ f"{Dataset.BASE_URL}/v2/llm/dataset/{dataset_id}?initialCols=0",
389
+ headers=headers,
390
+ timeout=Dataset.TIMEOUT,
391
+ )
392
+ response.raise_for_status()
393
+
394
+ # Extract schema mapping
395
+ schema_mapping = {}
396
+ for col in response.json()["data"]["datasetColumnsResponses"]:
397
+ schema_mapping[col["displayName"]] = {"columnType": col["columnType"]}
398
+
399
+ return schema_mapping
400
+ except requests.exceptions.RequestException as e:
401
+ logger.error(f"Failed to get schema mapping: {e}")
402
+ raise
403
+
404
+ # Upload CSV to elastic
405
+ try:
406
+ schema_mapping = generate_schema_mapping(dataset_name)
407
+
408
+ data = {
409
+ "projectId": str(self.project_id),
410
+ "datasetName": dataset_name,
411
+ "fileName": filename,
412
+ "schemaMapping": schema_mapping,
413
+ "opType": "update", # Use update for adding rows
414
+ "description": "Adding new rows to dataset"
415
+ }
416
+
417
+ headers = {
418
+ 'Content-Type': 'application/json',
419
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
420
+ "X-Project-Id": str(self.project_id)
421
+ }
422
+
423
+ response = requests.post(
424
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
425
+ headers=headers,
426
+ json=data,
427
+ timeout=Dataset.TIMEOUT,
428
+ )
429
+
430
+ if response.status_code == 400:
431
+ raise ValueError(response.json().get("message", "Failed to add rows"))
432
+
433
+ response.raise_for_status()
434
+
435
+ # Check response
436
+ response_data = response.json()
437
+ if response_data.get('success', False):
438
+ print(f"{response_data['message']}")
439
+ else:
440
+ raise ValueError(response_data.get('message', 'Failed to add rows'))
441
+
442
+ except Exception as e:
443
+ logger.error(f"Error in add_rows_to_dataset: {e}")
444
+ raise
445
+
446
+ def add_columns(self, text_fields, dataset_name, column_name, provider, model, variables={}):
447
+ """
448
+ Add a column to a dataset with dynamically fetched model parameters
449
+
450
+ Args:
451
+ project_id (int): Project ID
452
+ dataset_id (int): Dataset ID
453
+ column_name (str): Name of the new column
454
+ provider (str): Name of the model provider
455
+ model (str): Name of the model
456
+ """
457
+ # First, get model parameters
458
+
459
+ # Validate text_fields input
460
+ if not isinstance(text_fields, list):
461
+ raise ValueError("text_fields must be a list of dictionaries")
462
+
463
+ for field in text_fields:
464
+ if not isinstance(field, dict) or 'role' not in field or 'content' not in field:
465
+ raise ValueError("Each text field must be a dictionary with 'role' and 'content' keys")
466
+
467
+ # First, get the dataset ID
468
+ headers = {
469
+ 'Content-Type': 'application/json',
470
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
471
+ "X-Project-Id": str(self.project_id),
472
+ }
473
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
474
+
475
+ try:
476
+ # Get dataset list
477
+ response = requests.post(
478
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
479
+ headers=headers,
480
+ json=json_data,
481
+ timeout=Dataset.TIMEOUT,
482
+ )
483
+ response.raise_for_status()
484
+ datasets = response.json()["data"]["content"]
485
+
486
+ # Find dataset ID
487
+ dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
488
+
489
+ if dataset_id is None:
490
+ raise ValueError(f"Dataset {dataset_name} not found")
491
+
492
+
493
+
494
+ parameters_url= f"{Dataset.BASE_URL}/playground/providers/models/parameters/list"
495
+
496
+ headers = {
497
+ 'Content-Type': 'application/json',
498
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
499
+ "X-Project-Id": str(self.project_id),
500
+ }
501
+
502
+ # Fetch model parameters
503
+ parameters_payload = {
504
+ "providerName": provider,
505
+ "modelName": model
506
+ }
507
+
508
+ # Get model parameters
509
+ params_response = requests.post(
510
+ parameters_url,
511
+ headers=headers,
512
+ json=parameters_payload,
513
+ timeout=30
514
+ )
515
+ params_response.raise_for_status()
516
+
517
+ # Extract parameters
518
+ all_parameters = params_response.json().get('data', [])
519
+
520
+ # Filter and transform parameters for add-column API
521
+ formatted_parameters = []
522
+ for param in all_parameters:
523
+ value = param.get('value')
524
+ param_type = param.get('type')
525
+
526
+ if value is None:
527
+ formatted_param = {
528
+ "name": param.get('name'),
529
+ "value": None, # Pass None if the value is null
530
+ "type": param.get('type')
531
+ }
532
+ else:
533
+ # Improved type handling
534
+ if param_type == "float":
535
+ value = float(value) # Ensure value is converted to float
536
+ elif param_type == "int":
537
+ value = int(value) # Ensure value is converted to int
538
+ elif param_type == "bool":
539
+ value = bool(value) # Ensure value is converted to bool
540
+ elif param_type == "string":
541
+ value = str(value) # Ensure value is converted to string
542
+ else:
543
+ raise ValueError(f"Unsupported parameter type: {param_type}") # Handle unsupported types
544
+
545
+ formatted_param = {
546
+ "name": param.get('name'),
547
+ "value": value,
548
+ "type": param.get('type')
549
+ }
550
+ formatted_parameters.append(formatted_param)
551
+ dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
552
+
553
+ # Prepare payload for add column API
554
+ add_column_payload = {
555
+ "rowFilterList": [],
556
+ "columnName": column_name,
557
+ "datasetId": dataset_id,
558
+ "variables": variables,
559
+ "promptTemplate": {
560
+ "textFields": text_fields,
561
+ "modelSpecs": {
562
+ "model": f"{provider}/{model}",
563
+ "parameters": formatted_parameters
564
+ }
565
+ }
566
+ }
567
+ if variables:
568
+ variable_specs = []
569
+ for key, values in variables.items():
570
+ variable_specs.append({
571
+ "name": key,
572
+ "type": "string",
573
+ "schema": "query"
574
+ })
575
+ add_column_payload["promptTemplate"]["variableSpecs"] = variable_specs
576
+
577
+ # Make API call to add column
578
+ add_column_url = f"{Dataset.BASE_URL}/v2/llm/dataset/add-column"
579
+
580
+ response = requests.post(
581
+ add_column_url,
582
+ headers={
583
+ 'Content-Type': 'application/json',
584
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
585
+ "X-Project-Id": str(self.project_id)
586
+ },
587
+ json=add_column_payload,
588
+ timeout=30
589
+ )
590
+
591
+ # Check response
592
+ response.raise_for_status()
593
+ response_data = response.json()
594
+
595
+ if response_data.get('success', False):
596
+ print(f"Column '{column_name}' added successfully to dataset '{dataset_name}'")
597
+ else:
598
+ raise ValueError(response_data.get('message', 'Failed to add column'))
599
+
600
+ except requests.exceptions.RequestException as e:
601
+ print(f"Error adding column: {e}")
602
+ raise
603
+