ragaai-catalyst 2.1.5b0__tar.gz → 2.1.5b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {ragaai_catalyst-2.1.5b0/ragaai_catalyst.egg-info → ragaai_catalyst-2.1.5b1}/PKG-INFO +1 -1
  2. ragaai_catalyst-2.1.5b1/examples/sync_sample_call.py +57 -0
  3. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/pyproject.toml +1 -1
  4. ragaai_catalyst-2.1.5b1/ragaai_catalyst/dataset.py +603 -0
  5. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/agent_tracer.py +20 -4
  6. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/base.py +16 -14
  7. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/custom_tracer.py +17 -2
  8. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/llm_tracer.py +16 -1
  9. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/main_tracer.py +16 -19
  10. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/tool_tracer.py +37 -3
  11. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/model_costs.json +17 -1
  12. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/distributed.py +46 -19
  13. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/tracer.py +2 -2
  14. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1/ragaai_catalyst.egg-info}/PKG-INFO +1 -1
  15. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst.egg-info/SOURCES.txt +1 -0
  16. ragaai_catalyst-2.1.5b0/ragaai_catalyst/dataset.py +0 -273
  17. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  18. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  19. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  20. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/.gitignore +0 -0
  21. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/LICENSE +0 -0
  22. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/README.md +0 -0
  23. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/docs/dataset_management.md +0 -0
  24. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/docs/prompt_management.md +0 -0
  25. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/FinancialAnalysisSystem.ipynb +0 -0
  26. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/TravelPlanner.ipynb +0 -0
  27. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/custom_tracer_example.py +0 -0
  28. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/finance.py +0 -0
  29. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/prompt_management_litellm.ipynb +0 -0
  30. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/prompt_management_openai.ipynb +0 -0
  31. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/travel_agent/agents.py +0 -0
  32. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/travel_agent/config.py +0 -0
  33. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/travel_agent/main.py +0 -0
  34. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/examples/travel_agent/tools.py +0 -0
  35. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/__init__.py +0 -0
  36. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/_version.py +0 -0
  37. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/evaluation.py +0 -0
  38. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/experiment.py +0 -0
  39. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/guard_executor.py +0 -0
  40. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/guardrails_manager.py +0 -0
  41. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/internal_api_completion.py +0 -0
  42. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/prompt_manager.py +0 -0
  43. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/proxy_call.py +0 -0
  44. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/ragaai_catalyst.py +0 -0
  45. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/synthetic_data_generation.py +0 -0
  46. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/__init__.py +0 -0
  47. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/README.md +0 -0
  48. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/__init__.py +0 -0
  49. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/data/__init__.py +0 -0
  50. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/data/data_structure.py +0 -0
  51. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tests/FinancialAnalysisSystem.ipynb +0 -0
  52. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tests/GameActivityEventPlanner.ipynb +0 -0
  53. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tests/TravelPlanner.ipynb +0 -0
  54. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tests/__init__.py +0 -0
  55. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tests/ai_travel_agent.py +0 -0
  56. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tests/unique_decorator_test.py +0 -0
  57. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/__init__.py +0 -0
  58. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/langgraph_tracer.py +0 -0
  59. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/network_tracer.py +0 -0
  60. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/tracers/user_interaction_tracer.py +0 -0
  61. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/upload/__init__.py +0 -0
  62. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/upload/upload_agentic_traces.py +0 -0
  63. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/upload/upload_code.py +0 -0
  64. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/upload/upload_trace_metric.py +0 -0
  65. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/__init__.py +0 -0
  66. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/api_utils.py +0 -0
  67. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/create_dataset_schema.py +0 -0
  68. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/file_name_tracker.py +0 -0
  69. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/generic.py +0 -0
  70. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/get_user_trace_metrics.py +0 -0
  71. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/llm_utils.py +0 -0
  72. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/span_attributes.py +0 -0
  73. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/trace_utils.py +0 -0
  74. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/unique_decorator.py +0 -0
  75. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/agentic_tracing/utils/zip_list_of_unique_files.py +0 -0
  76. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/exporters/__init__.py +0 -0
  77. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/exporters/file_span_exporter.py +0 -0
  78. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/exporters/raga_exporter.py +0 -0
  79. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/instrumentators/__init__.py +0 -0
  80. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/instrumentators/langchain.py +0 -0
  81. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/instrumentators/llamaindex.py +0 -0
  82. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/instrumentators/openai.py +0 -0
  83. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/llamaindex_callback.py +0 -0
  84. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/upload_traces.py +0 -0
  85. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/utils/__init__.py +0 -0
  86. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/tracers/utils/utils.py +0 -0
  87. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst/utils.py +0 -0
  88. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst.egg-info/dependency_links.txt +0 -0
  89. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst.egg-info/requires.txt +0 -0
  90. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/ragaai_catalyst.egg-info/top_level.txt +0 -0
  91. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/requirements.txt +0 -0
  92. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/setup.cfg +0 -0
  93. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/test/test_catalyst/test_configuration.py +0 -0
  94. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/test/test_catalyst/test_dataset.py +0 -0
  95. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/test/test_catalyst/test_evaluation.py +0 -0
  96. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/test/test_catalyst/test_prompt_manager.py +0 -0
  97. {ragaai_catalyst-2.1.5b0 → ragaai_catalyst-2.1.5b1}/test/test_catalyst/test_synthetic_data_generation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ragaai_catalyst
3
- Version: 2.1.5b0
3
+ Version: 2.1.5b1
4
4
  Summary: RAGA AI CATALYST
5
5
  Author-email: Kiran Scaria <kiran.scaria@raga.ai>, Kedar Gaikwad <kedar.gaikwad@raga.ai>, Dushyant Mahajan <dushyant.mahajan@raga.ai>, Siddhartha Kosti <siddhartha.kosti@raga.ai>, Ritika Goel <ritika.goel@raga.ai>, Vijay Chaurasia <vijay.chaurasia@raga.ai>
6
6
  Requires-Python: <3.13,>=3.9
@@ -0,0 +1,57 @@
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ load_dotenv()
5
+ from litellm import completion
6
+ import openai
7
+ from openai import OpenAI
8
+ from ragaai_catalyst.tracers import Tracer
9
+ from ragaai_catalyst import RagaAICatalyst
10
+
11
+ catalyst = RagaAICatalyst(
12
+ access_key=os.getenv("RAGAAI_CATALYST_ACCESS_KEY"),
13
+ secret_key=os.getenv("RAGAAI_CATALYST_SECRET_KEY"),
14
+ base_url=os.getenv("RAGAAI_CATALYST_BASE_URL"),
15
+ )
16
+ # Initialize tracer
17
+ tracer = Tracer(
18
+ project_name="alteryx_copilot-tan",
19
+ dataset_name="testing-1",
20
+ tracer_type="Agentic",
21
+ auto_instrumentation=
22
+ {
23
+ "user_interaction": False,
24
+ "file_io": True
25
+ }
26
+ )
27
+
28
+ tracer.start()
29
+ @tracer.trace_tool(name="llm_call")
30
+ def llm_call(prompt, max_tokens=512, model="gpt-3.5-turbo"):
31
+ client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
32
+ print(f"Prompt: {prompt}")
33
+ print(f"Max Tokens: {max_tokens}")
34
+ print(f"Model: {model}")
35
+ input("Press Enter to continue...")
36
+ with open("response.txt", "w") as f:
37
+ f.write("test")
38
+ try:
39
+ response = client.chat.completions.create(
40
+ model=model,
41
+ messages=[{"role": "user", "content": prompt}],
42
+ max_tokens=max_tokens,
43
+ temperature=0.7,
44
+ )
45
+ return response.choices[0].message.content.strip()
46
+ except Exception as e:
47
+ print(f"Error in llm_call: {str(e)}")
48
+ raise
49
+
50
+
51
+ def main():
52
+ response = llm_call("how are you?")
53
+ print(f"Response: {response}")
54
+
55
+ if __name__ == "__main__":
56
+ main()
57
+ tracer.stop()
@@ -9,7 +9,7 @@ readme = "README.md"
9
9
  requires-python = ">=3.9,<3.13"
10
10
  # license = {file = "LICENSE"}
11
11
 
12
- version = "2.1.5.beta.0"
12
+ version = "2.1.5.beta.1"
13
13
  authors = [
14
14
  {name = "Kiran Scaria", email = "kiran.scaria@raga.ai"},
15
15
  {name = "Kedar Gaikwad", email = "kedar.gaikwad@raga.ai"},
@@ -0,0 +1,603 @@
1
+ import os
2
+ import json
3
+ import requests
4
+ from .utils import response_checker
5
+ from typing import Union
6
+ import logging
7
+ from .ragaai_catalyst import RagaAICatalyst
8
+ import pandas as pd
9
+ logger = logging.getLogger(__name__)
10
+ get_token = RagaAICatalyst.get_token
11
+
12
+
13
+ class Dataset:
14
+ BASE_URL = None
15
+ TIMEOUT = 30
16
+
17
+ def __init__(self, project_name):
18
+ self.project_name = project_name
19
+ self.num_projects = 99999
20
+ Dataset.BASE_URL = RagaAICatalyst.BASE_URL
21
+ headers = {
22
+ "Authorization": f'Bearer {os.getenv("RAGAAI_CATALYST_TOKEN")}',
23
+ }
24
+ try:
25
+ response = requests.get(
26
+ f"{Dataset.BASE_URL}/v2/llm/projects?size={self.num_projects}",
27
+ headers=headers,
28
+ timeout=self.TIMEOUT,
29
+ )
30
+ response.raise_for_status()
31
+ logger.debug("Projects list retrieved successfully")
32
+
33
+ project_list = [
34
+ project["name"] for project in response.json()["data"]["content"]
35
+ ]
36
+
37
+ if project_name not in project_list:
38
+ raise ValueError("Project not found. Please enter a valid project name")
39
+
40
+ self.project_id = [
41
+ project["id"] for project in response.json()["data"]["content"] if project["name"] == project_name
42
+ ][0]
43
+
44
+ except requests.exceptions.RequestException as e:
45
+ logger.error(f"Failed to retrieve projects list: {e}")
46
+ raise
47
+
48
+ def list_datasets(self):
49
+ """
50
+ Retrieves a list of datasets for a given project.
51
+
52
+ Returns:
53
+ list: A list of dataset names.
54
+
55
+ Raises:
56
+ None.
57
+ """
58
+
59
+ def make_request():
60
+ headers = {
61
+ 'Content-Type': 'application/json',
62
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
63
+ "X-Project-Id": str(self.project_id),
64
+ }
65
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
66
+ try:
67
+ response = requests.post(
68
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
69
+ headers=headers,
70
+ json=json_data,
71
+ timeout=Dataset.TIMEOUT,
72
+ )
73
+ response.raise_for_status()
74
+ return response
75
+ except requests.exceptions.RequestException as e:
76
+ logger.error(f"Failed to list datasets: {e}")
77
+ raise
78
+
79
+ try:
80
+ response = make_request()
81
+ response_checker(response, "Dataset.list_datasets")
82
+ if response.status_code == 401:
83
+ get_token() # Fetch a new token and set it in the environment
84
+ response = make_request() # Retry the request
85
+ if response.status_code != 200:
86
+ return {
87
+ "status_code": response.status_code,
88
+ "message": response.json(),
89
+ }
90
+ datasets = response.json()["data"]["content"]
91
+ dataset_list = [dataset["name"] for dataset in datasets]
92
+ return dataset_list
93
+ except Exception as e:
94
+ logger.error(f"Error in list_datasets: {e}")
95
+ raise
96
+
97
+ def get_schema_mapping(self):
98
+ headers = {
99
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
100
+ "X-Project-Name": self.project_name,
101
+ }
102
+ try:
103
+ response = requests.get(
104
+ f"{Dataset.BASE_URL}/v1/llm/schema-elements",
105
+ headers=headers,
106
+ timeout=Dataset.TIMEOUT,
107
+ )
108
+ response.raise_for_status()
109
+ response_data = response.json()["data"]["schemaElements"]
110
+ if not response.json()['success']:
111
+ raise ValueError('Unable to fetch Schema Elements for the CSV')
112
+ return response_data
113
+ except requests.exceptions.RequestException as e:
114
+ logger.error(f"Failed to get CSV schema: {e}")
115
+ raise
116
+
117
+ ###################### CSV Upload APIs ###################
118
+
119
+ def get_dataset_columns(self, dataset_name):
120
+ list_dataset = self.list_datasets()
121
+ if dataset_name not in list_dataset:
122
+ raise ValueError(f"Dataset {dataset_name} does not exists. Please enter a valid dataset name")
123
+
124
+ headers = {
125
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
126
+ "X-Project-Name": self.project_name,
127
+ }
128
+ headers = {
129
+ 'Content-Type': 'application/json',
130
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
131
+ "X-Project-Id": str(self.project_id),
132
+ }
133
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
134
+ try:
135
+ response = requests.post(
136
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
137
+ headers=headers,
138
+ json=json_data,
139
+ timeout=Dataset.TIMEOUT,
140
+ )
141
+ response.raise_for_status()
142
+ datasets = response.json()["data"]["content"]
143
+ dataset_id = [dataset["id"] for dataset in datasets if dataset["name"]==dataset_name][0]
144
+ except requests.exceptions.RequestException as e:
145
+ logger.error(f"Failed to list datasets: {e}")
146
+ raise
147
+
148
+ try:
149
+ response = requests.get(
150
+ f"{Dataset.BASE_URL}/v2/llm/dataset/{dataset_id}?initialCols=0",
151
+ headers=headers,
152
+ timeout=Dataset.TIMEOUT,
153
+ )
154
+ response.raise_for_status()
155
+ dataset_columns = response.json()["data"]["datasetColumnsResponses"]
156
+ dataset_columns = [item["displayName"] for item in dataset_columns]
157
+ dataset_columns = [data for data in dataset_columns if not data.startswith('_')]
158
+ if not response.json()['success']:
159
+ raise ValueError('Unable to fetch details of for the CSV')
160
+ return dataset_columns
161
+ except requests.exceptions.RequestException as e:
162
+ logger.error(f"Failed to get CSV columns: {e}")
163
+ raise
164
+
165
+ def create_from_csv(self, csv_path, dataset_name, schema_mapping):
166
+ list_dataset = self.list_datasets()
167
+ if dataset_name in list_dataset:
168
+ raise ValueError(f"Dataset name {dataset_name} already exists. Please enter a unique dataset name")
169
+
170
+ #### get presigned URL
171
+ def get_presignedUrl():
172
+ headers = {
173
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
174
+ "X-Project-Id": str(self.project_id),
175
+ }
176
+ try:
177
+ response = requests.get(
178
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
179
+ headers=headers,
180
+ timeout=Dataset.TIMEOUT,
181
+ )
182
+ response.raise_for_status()
183
+ return response.json()
184
+ except requests.exceptions.RequestException as e:
185
+ logger.error(f"Failed to get presigned URL: {e}")
186
+ raise
187
+
188
+ try:
189
+ presignedUrl = get_presignedUrl()
190
+ if presignedUrl['success']:
191
+ url = presignedUrl['data']['presignedUrl']
192
+ filename = presignedUrl['data']['fileName']
193
+ else:
194
+ raise ValueError('Unable to fetch presignedUrl')
195
+ except Exception as e:
196
+ logger.error(f"Error in get_presignedUrl: {e}")
197
+ raise
198
+
199
+ #### put csv to presigned URL
200
+ def put_csv_to_presignedUrl(url):
201
+ headers = {
202
+ 'Content-Type': 'text/csv',
203
+ 'x-ms-blob-type': 'BlockBlob',
204
+ }
205
+ try:
206
+ with open(csv_path, 'rb') as file:
207
+ response = requests.put(
208
+ url,
209
+ headers=headers,
210
+ data=file,
211
+ timeout=Dataset.TIMEOUT,
212
+ )
213
+ response.raise_for_status()
214
+ return response
215
+ except requests.exceptions.RequestException as e:
216
+ logger.error(f"Failed to put CSV to presigned URL: {e}")
217
+ raise
218
+
219
+ try:
220
+
221
+ put_csv_response = put_csv_to_presignedUrl(url)
222
+ print(put_csv_response)
223
+ if put_csv_response.status_code not in (200, 201):
224
+ raise ValueError('Unable to put csv to the presignedUrl')
225
+ except Exception as e:
226
+ logger.error(f"Error in put_csv_to_presignedUrl: {e}")
227
+ raise
228
+
229
+ ## Upload csv to elastic
230
+ def upload_csv_to_elastic(data):
231
+ header = {
232
+ 'Content-Type': 'application/json',
233
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
234
+ "X-Project-Id": str(self.project_id)
235
+ }
236
+ try:
237
+ response = requests.post(
238
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
239
+ headers=header,
240
+ json=data,
241
+ timeout=Dataset.TIMEOUT,
242
+ )
243
+ if response.status_code==400:
244
+ raise ValueError(response.json()["message"])
245
+ response.raise_for_status()
246
+ return response.json()
247
+ except requests.exceptions.RequestException as e:
248
+ logger.error(f"Failed to upload CSV to elastic: {e}")
249
+ raise
250
+
251
+ def generate_schema(mapping):
252
+ result = {}
253
+ for column, schema_element in mapping.items():
254
+ result[column] = {"columnType": schema_element}
255
+ return result
256
+
257
+ try:
258
+ schema_mapping = generate_schema(schema_mapping)
259
+ data = {
260
+ "projectId": str(self.project_id),
261
+ "datasetName": dataset_name,
262
+ "fileName": filename,
263
+ "schemaMapping": schema_mapping,
264
+ "opType": "insert",
265
+ "description": ""
266
+ }
267
+ upload_csv_response = upload_csv_to_elastic(data)
268
+ if not upload_csv_response['success']:
269
+ raise ValueError('Unable to upload csv')
270
+ else:
271
+ print(upload_csv_response['message'])
272
+ except Exception as e:
273
+ logger.error(f"Error in create_from_csv: {e}")
274
+ raise
275
+
276
+ def add_rows(self, csv_path, dataset_name):
277
+ """
278
+ Add rows to an existing dataset from a CSV file.
279
+
280
+ Args:
281
+ csv_path (str): Path to the CSV file to be added
282
+ dataset_name (str): Name of the existing dataset to add rows to
283
+
284
+ Raises:
285
+ ValueError: If dataset does not exist or columns are incompatible
286
+ """
287
+ # Get existing dataset columns
288
+ existing_columns = self.get_dataset_columns(dataset_name)
289
+
290
+ # Read the CSV file to check columns
291
+ try:
292
+ import pandas as pd
293
+ df = pd.read_csv(csv_path)
294
+ csv_columns = df.columns.tolist()
295
+ except Exception as e:
296
+ logger.error(f"Failed to read CSV file: {e}")
297
+ raise ValueError(f"Unable to read CSV file: {e}")
298
+
299
+ # Check column compatibility
300
+ for column in existing_columns:
301
+ if column not in csv_columns:
302
+ df[column] = None
303
+
304
+ # Get presigned URL for the CSV
305
+ def get_presignedUrl():
306
+ headers = {
307
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
308
+ "X-Project-Id": str(self.project_id),
309
+ }
310
+ try:
311
+ response = requests.get(
312
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv/presigned-url",
313
+ headers=headers,
314
+ timeout=Dataset.TIMEOUT,
315
+ )
316
+ response.raise_for_status()
317
+ return response.json()
318
+ except requests.exceptions.RequestException as e:
319
+ logger.error(f"Failed to get presigned URL: {e}")
320
+ raise
321
+
322
+ try:
323
+ presignedUrl = get_presignedUrl()
324
+ if presignedUrl['success']:
325
+ url = presignedUrl['data']['presignedUrl']
326
+ filename = presignedUrl['data']['fileName']
327
+ else:
328
+ raise ValueError('Unable to fetch presignedUrl')
329
+ except Exception as e:
330
+ logger.error(f"Error in get_presignedUrl: {e}")
331
+ raise
332
+
333
+ # Upload CSV to presigned URL
334
+ def put_csv_to_presignedUrl(url):
335
+ headers = {
336
+ 'Content-Type': 'text/csv',
337
+ 'x-ms-blob-type': 'BlockBlob',
338
+ }
339
+ try:
340
+ with open(csv_path, 'rb') as file:
341
+ response = requests.put(
342
+ url,
343
+ headers=headers,
344
+ data=file,
345
+ timeout=Dataset.TIMEOUT,
346
+ )
347
+ response.raise_for_status()
348
+ return response
349
+ except requests.exceptions.RequestException as e:
350
+ logger.error(f"Failed to put CSV to presigned URL: {e}")
351
+ raise
352
+
353
+ try:
354
+ put_csv_response = put_csv_to_presignedUrl(url)
355
+ if put_csv_response.status_code not in (200, 201):
356
+ raise ValueError('Unable to put csv to the presignedUrl')
357
+ except Exception as e:
358
+ logger.error(f"Error in put_csv_to_presignedUrl: {e}")
359
+ raise
360
+
361
+ # Prepare schema mapping (assuming same mapping as original dataset)
362
+ def generate_schema_mapping(dataset_name):
363
+ headers = {
364
+ 'Content-Type': 'application/json',
365
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
366
+ "X-Project-Id": str(self.project_id),
367
+ }
368
+ json_data = {
369
+ "size": 12,
370
+ "page": "0",
371
+ "projectId": str(self.project_id),
372
+ "search": ""
373
+ }
374
+ try:
375
+ # First get dataset details
376
+ response = requests.post(
377
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
378
+ headers=headers,
379
+ json=json_data,
380
+ timeout=Dataset.TIMEOUT,
381
+ )
382
+ response.raise_for_status()
383
+ datasets = response.json()["data"]["content"]
384
+ dataset_id = [dataset["id"] for dataset in datasets if dataset["name"]==dataset_name][0]
385
+
386
+ # Get dataset details to extract schema mapping
387
+ response = requests.get(
388
+ f"{Dataset.BASE_URL}/v2/llm/dataset/{dataset_id}?initialCols=0",
389
+ headers=headers,
390
+ timeout=Dataset.TIMEOUT,
391
+ )
392
+ response.raise_for_status()
393
+
394
+ # Extract schema mapping
395
+ schema_mapping = {}
396
+ for col in response.json()["data"]["datasetColumnsResponses"]:
397
+ schema_mapping[col["displayName"]] = {"columnType": col["columnType"]}
398
+
399
+ return schema_mapping
400
+ except requests.exceptions.RequestException as e:
401
+ logger.error(f"Failed to get schema mapping: {e}")
402
+ raise
403
+
404
+ # Upload CSV to elastic
405
+ try:
406
+ schema_mapping = generate_schema_mapping(dataset_name)
407
+
408
+ data = {
409
+ "projectId": str(self.project_id),
410
+ "datasetName": dataset_name,
411
+ "fileName": filename,
412
+ "schemaMapping": schema_mapping,
413
+ "opType": "update", # Use update for adding rows
414
+ "description": "Adding new rows to dataset"
415
+ }
416
+
417
+ headers = {
418
+ 'Content-Type': 'application/json',
419
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
420
+ "X-Project-Id": str(self.project_id)
421
+ }
422
+
423
+ response = requests.post(
424
+ f"{Dataset.BASE_URL}/v2/llm/dataset/csv",
425
+ headers=headers,
426
+ json=data,
427
+ timeout=Dataset.TIMEOUT,
428
+ )
429
+
430
+ if response.status_code == 400:
431
+ raise ValueError(response.json().get("message", "Failed to add rows"))
432
+
433
+ response.raise_for_status()
434
+
435
+ # Check response
436
+ response_data = response.json()
437
+ if not response_data.get('success', False):
438
+ raise ValueError(response_data.get('message', 'Unknown error occurred'))
439
+
440
+ print(f"Successfully added rows to dataset {dataset_name}")
441
+ return response_data
442
+
443
+ except Exception as e:
444
+ logger.error(f"Error in add_rows_to_dataset: {e}")
445
+ raise
446
+
447
+ def add_columns(self,text_fields,dataset_name, column_name, provider, model,variables={}):
448
+ """
449
+ Add a column to a dataset with dynamically fetched model parameters
450
+
451
+ Args:
452
+ project_id (int): Project ID
453
+ dataset_id (int): Dataset ID
454
+ column_name (str): Name of the new column
455
+ provider (str): Name of the model provider
456
+ model (str): Name of the model
457
+ """
458
+ # First, get model parameters
459
+
460
+ # Validate text_fields input
461
+ if not isinstance(text_fields, list):
462
+ raise ValueError("text_fields must be a list of dictionaries")
463
+
464
+ for field in text_fields:
465
+ if not isinstance(field, dict) or 'role' not in field or 'content' not in field:
466
+ raise ValueError("Each text field must be a dictionary with 'role' and 'content' keys")
467
+
468
+ # First, get the dataset ID
469
+ headers = {
470
+ 'Content-Type': 'application/json',
471
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
472
+ "X-Project-Id": str(self.project_id),
473
+ }
474
+ json_data = {"size": 12, "page": "0", "projectId": str(self.project_id), "search": ""}
475
+
476
+ try:
477
+ # Get dataset list
478
+ response = requests.post(
479
+ f"{Dataset.BASE_URL}/v2/llm/dataset",
480
+ headers=headers,
481
+ json=json_data,
482
+ timeout=Dataset.TIMEOUT,
483
+ )
484
+ response.raise_for_status()
485
+ datasets = response.json()["data"]["content"]
486
+
487
+ # Find dataset ID
488
+ dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
489
+
490
+ if dataset_id is None:
491
+ raise ValueError(f"Dataset {dataset_name} not found")
492
+
493
+
494
+
495
+ parameters_url= f"{Dataset.BASE_URL}/playground/providers/models/parameters/list"
496
+
497
+ headers = {
498
+ 'Content-Type': 'application/json',
499
+ "Authorization": f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
500
+ "X-Project-Id": str(self.project_id),
501
+ }
502
+
503
+ # Fetch model parameters
504
+ parameters_payload = {
505
+ "providerName": provider,
506
+ "modelName": model
507
+ }
508
+
509
+ # Get model parameters
510
+ params_response = requests.post(
511
+ parameters_url,
512
+ headers=headers,
513
+ json=parameters_payload,
514
+ timeout=30
515
+ )
516
+ params_response.raise_for_status()
517
+
518
+ # Extract parameters
519
+ all_parameters = params_response.json().get('data', [])
520
+
521
+ # Filter and transform parameters for add-column API
522
+ formatted_parameters = []
523
+ for param in all_parameters:
524
+ value = param.get('value')
525
+ param_type = param.get('type')
526
+
527
+ if value is None:
528
+ formatted_param = {
529
+ "name": param.get('name'),
530
+ "value": None, # Pass None if the value is null
531
+ "type": param.get('type')
532
+ }
533
+ else:
534
+ # Improved type handling
535
+ if param_type == "float":
536
+ value = float(value) # Ensure value is converted to float
537
+ elif param_type == "int":
538
+ value = int(value) # Ensure value is converted to int
539
+ elif param_type == "bool":
540
+ value = bool(value) # Ensure value is converted to bool
541
+ elif param_type == "string":
542
+ value = str(value) # Ensure value is converted to string
543
+ else:
544
+ raise ValueError(f"Unsupported parameter type: {param_type}") # Handle unsupported types
545
+
546
+ formatted_param = {
547
+ "name": param.get('name'),
548
+ "value": value,
549
+ "type": param.get('type')
550
+ }
551
+ formatted_parameters.append(formatted_param)
552
+ dataset_id = next((dataset["id"] for dataset in datasets if dataset["name"] == dataset_name), None)
553
+
554
+ # Prepare payload for add column API
555
+ add_column_payload = {
556
+ "rowFilterList": [],
557
+ "columnName": column_name,
558
+ "datasetId": dataset_id,
559
+ "variables": variables,
560
+ "promptTemplate": {
561
+ "textFields": text_fields,
562
+ "modelSpecs": {
563
+ "model": f"{provider}/{model}",
564
+ "parameters": formatted_parameters
565
+ }
566
+ }
567
+ }
568
+ if variables:
569
+ variable_specs = []
570
+ for key, values in variables.items():
571
+ variable_specs.append({
572
+ "name": key,
573
+ "type": "string",
574
+ "schema": values
575
+ })
576
+ add_column_payload["promptTemplate"]["variableSpecs"] = variable_specs
577
+
578
+ # Make API call to add column
579
+ add_column_url = f"{Dataset.BASE_URL}/v2/llm/dataset/add-column"
580
+
581
+ response = requests.post(
582
+ add_column_url,
583
+ headers={
584
+ 'Content-Type': 'application/json',
585
+ 'Authorization': f"Bearer {os.getenv('RAGAAI_CATALYST_TOKEN')}",
586
+ "X-Project-Id": str(self.project_id)
587
+ },
588
+ json=add_column_payload,
589
+ timeout=30
590
+ )
591
+
592
+ # Check response
593
+ response.raise_for_status()
594
+ response_data = response.json()
595
+
596
+ print("Column added successfully:")
597
+ print(json.dumps(response_data, indent=2))
598
+ return response_data
599
+
600
+ except requests.exceptions.RequestException as e:
601
+ print(f"Error adding column: {e}")
602
+ raise
603
+