aiqa-client 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,176 @@
1
+ """
2
+ Example usage of the ExperimentRunner class.
3
+ """
4
+
5
+ import asyncio
6
+ import os
7
+ from dotenv import load_dotenv
8
+ from aiqa import ExperimentRunner
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+
14
+ # A dummy test engine that returns a dummy response
15
+ async def my_engine(input_data):
16
+ """
17
+ Example engine function that simulates an API call.
18
+ Note: For run(), the engine only takes input_data.
19
+ For run_example(), you can use an engine that takes (input_data, parameters).
20
+ """
21
+ # Imitate an OpenAI API response
22
+ # Sleep for random about 0.5 - 1 seconds
23
+ import random
24
+
25
+ sleep_time = random.random() * 0.5 + 0.5
26
+ await asyncio.sleep(sleep_time)
27
+ return {
28
+ "choices": [
29
+ {
30
+ "message": {
31
+ "content": f"hello {input_data}",
32
+ },
33
+ },
34
+ ],
35
+ }
36
+
37
+
38
+ async def scorer(output, example):
39
+ """
40
+ Example scorer function that scores the output.
41
+ In a real scenario, you would use the metrics from the dataset.
42
+ Note: For run(), the scorer only takes (output, example).
43
+ For run_example(), you can use a scorer that takes (output, example, parameters).
44
+ """
45
+ # This is a simple example - in practice, you'd use the metrics from the dataset
46
+ # and call the scoring functions accordingly
47
+ scores = {}
48
+ # Add your scoring logic here
49
+ return scores
50
+
51
+
52
+ async def example_basic_usage():
53
+ """
54
+ Basic example of using ExperimentRunner.
55
+ """
56
+ if not os.getenv("AIQA_API_KEY"):
57
+ print("Warning: AIQA_API_KEY environment variable is not set. Example may fail.")
58
+
59
+ dataset_id = "your-dataset-id-here"
60
+ organisation_id = "your-organisation-id-here"
61
+
62
+ experiment_runner = ExperimentRunner(
63
+ dataset_id=dataset_id,
64
+ organisation_id=organisation_id,
65
+ )
66
+
67
+ # Get metrics from the dataset
68
+ dataset = experiment_runner.get_dataset()
69
+ metrics = dataset.get("metrics", [])
70
+ print(f"Found {len(metrics)} metrics in dataset: {[m['name'] for m in metrics]}")
71
+
72
+ # Create scorer that scores all metrics from the dataset
73
+ # (In practice, you'd implement this based on your metrics)
74
+ async def dataset_scorer(output, example):
75
+ # Use the metrics from the dataset to score
76
+ # This is a placeholder - implement based on your actual metrics
77
+ return await scorer(output, example)
78
+
79
+ # Get example inputs
80
+ example_inputs = experiment_runner.get_example_inputs()
81
+ print(f"Processing {len(example_inputs)} examples")
82
+
83
+ # Run experiments on each example
84
+ for example in example_inputs:
85
+ result = await experiment_runner.run_example(example, my_engine, dataset_scorer)
86
+ if result and len(result) > 0:
87
+ print(f"Scored example {example['id']}: {result}")
88
+ else:
89
+ print(f"No results for example {example['id']}")
90
+
91
+ # Get summary results
92
+ summary_results = experiment_runner.get_summary_results()
93
+ print(f"Summary results: {summary_results}")
94
+
95
+
96
+ async def example_with_experiment_setup():
97
+ """
98
+ Example of creating an experiment with custom setup.
99
+ """
100
+ dataset_id = "your-dataset-id-here"
101
+ organisation_id = "your-organisation-id-here"
102
+
103
+ experiment_runner = ExperimentRunner(
104
+ dataset_id=dataset_id,
105
+ organisation_id=organisation_id,
106
+ )
107
+
108
+ # Create experiment with custom parameters
109
+ experiment = experiment_runner.create_experiment(
110
+ {
111
+ "name": "My Custom Experiment",
112
+ "parameters": {
113
+ "model": "gpt-4",
114
+ "temperature": 0.7,
115
+ },
116
+ "comparison_parameters": [
117
+ {"temperature": 0.5},
118
+ {"temperature": 0.9},
119
+ ],
120
+ }
121
+ )
122
+
123
+ print(f"Created experiment: {experiment['id']}")
124
+
125
+ # Now run the experiment
126
+ await experiment_runner.run(my_engine, scorer)
127
+
128
+
129
+ async def example_stepwise():
130
+ """
131
+ Example of running experiments step by step (more control).
132
+ """
133
+ dataset_id = "your-dataset-id-here"
134
+ organisation_id = "your-organisation-id-here"
135
+
136
+ experiment_runner = ExperimentRunner(
137
+ dataset_id=dataset_id,
138
+ organisation_id=organisation_id,
139
+ )
140
+
141
+ # Get the dataset
142
+ dataset = experiment_runner.get_dataset()
143
+ metrics = dataset.get("metrics", [])
144
+ print(f"Found {len(metrics)} metrics in dataset")
145
+
146
+ # Create scorer for run_example (takes parameters)
147
+ async def my_scorer(output, example, parameters):
148
+ # Implement your scoring logic here
149
+ # Note: run_example() passes parameters, so this scorer can use them
150
+ return {"score": 0.8} # Placeholder
151
+
152
+ # Get examples
153
+ examples = experiment_runner.get_example_inputs(limit=100)
154
+ print(f"Processing {len(examples)} examples")
155
+
156
+ # Process each example individually
157
+ for example in examples:
158
+ try:
159
+ result = await experiment_runner.run_example(example, my_engine, my_scorer)
160
+ print(f"Example {example['id']} completed: {result}")
161
+ except Exception as e:
162
+ print(f"Example {example['id']} failed: {e}")
163
+
164
+ # Get final summary
165
+ summary = experiment_runner.get_summary_results()
166
+ print(f"Final summary: {summary}")
167
+
168
+
169
+ if __name__ == "__main__":
170
+ # Uncomment the example you want to run:
171
+ # asyncio.run(example_basic_usage())
172
+ # asyncio.run(example_with_experiment_setup())
173
+ # asyncio.run(example_stepwise())
174
+ print("Please uncomment one of the examples above to run it.")
175
+ print("Make sure to set your dataset_id and organisation_id in the example functions.")
176
+
aiqa/test_tracing.py ADDED
@@ -0,0 +1,230 @@
1
+ """
2
+ Unit tests for tracing.py functions.
3
+ """
4
+
5
+ import os
6
+ import pytest
7
+ from unittest.mock import patch, MagicMock
8
+ from aiqa.tracing import get_span
9
+
10
+
11
+ class TestGetSpan:
12
+ """Tests for get_span function."""
13
+
14
+ def test_get_span_success_with_span_id(self):
15
+ """Test successful retrieval of span using spanId query."""
16
+ span_data = {
17
+ "id": "test-span-123",
18
+ "name": "test_span",
19
+ "trace_id": "abc123",
20
+ "attributes": {"key": "value"},
21
+ }
22
+ mock_response_data = {"hits": [span_data]}
23
+
24
+ with patch.dict(
25
+ os.environ,
26
+ {
27
+ "AIQA_SERVER_URL": "http://localhost:3000",
28
+ "AIQA_API_KEY": "test-api-key",
29
+ "AIQA_ORGANISATION_ID": "test-org",
30
+ },
31
+ ):
32
+ with patch("requests.get") as mock_get:
33
+ mock_response = MagicMock()
34
+ mock_response.status_code = 200
35
+ mock_response.json.return_value = mock_response_data
36
+
37
+ mock_get.return_value = mock_response
38
+
39
+ result = get_span("test-span-123")
40
+
41
+ assert result == span_data
42
+ mock_get.assert_called_once()
43
+ call_args = mock_get.call_args
44
+ assert call_args[0][0] == "http://localhost:3000/span"
45
+ assert "q" in call_args[1]["params"]
46
+ assert call_args[1]["params"]["q"] == "spanId:test-span-123"
47
+
48
+ def test_get_span_success_with_client_span_id(self):
49
+ """Test successful retrieval of span using clientSpanId query when spanId fails."""
50
+ span_data = {
51
+ "id": "test-span-123",
52
+ "name": "test_span",
53
+ "trace_id": "abc123",
54
+ }
55
+ mock_response_data = {"hits": [span_data]}
56
+
57
+ with patch.dict(
58
+ os.environ,
59
+ {
60
+ "AIQA_SERVER_URL": "http://localhost:3000",
61
+ "AIQA_API_KEY": "test-api-key",
62
+ "AIQA_ORGANISATION_ID": "test-org",
63
+ },
64
+ ):
65
+ with patch("requests.get") as mock_get:
66
+ # First call returns 404 (spanId not found), second call succeeds (clientSpanId)
67
+ mock_response_404 = MagicMock()
68
+ mock_response_404.status_code = 404
69
+
70
+ mock_response_200 = MagicMock()
71
+ mock_response_200.status_code = 200
72
+ mock_response_200.json.return_value = mock_response_data
73
+
74
+ mock_get.side_effect = [mock_response_404, mock_response_200]
75
+
76
+ result = get_span("test-span-123")
77
+
78
+ assert result == span_data
79
+ assert mock_get.call_count == 2
80
+ # Check that second call uses clientSpanId
81
+ second_call = mock_get.call_args_list[1]
82
+ assert second_call[1]["params"]["q"] == "clientSpanId:test-span-123"
83
+
84
+ def test_get_span_not_found(self):
85
+ """Test that get_span returns None when span is not found."""
86
+ with patch.dict(
87
+ os.environ,
88
+ {
89
+ "AIQA_SERVER_URL": "http://localhost:3000",
90
+ "AIQA_API_KEY": "test-api-key",
91
+ "AIQA_ORGANISATION_ID": "test-org",
92
+ },
93
+ ):
94
+ with patch("requests.get") as mock_get:
95
+ # Both queries return 404
96
+ mock_response_404 = MagicMock()
97
+ mock_response_404.status_code = 404
98
+
99
+ mock_get.return_value = mock_response_404
100
+
101
+ result = get_span("nonexistent-span")
102
+
103
+ assert result is None
104
+ assert mock_get.call_count == 2
105
+
106
+ def test_get_span_empty_hits(self):
107
+ """Test that get_span returns None when hits array is empty."""
108
+ mock_response_data = {"hits": []}
109
+
110
+ with patch.dict(
111
+ os.environ,
112
+ {
113
+ "AIQA_SERVER_URL": "http://localhost:3000",
114
+ "AIQA_API_KEY": "test-api-key",
115
+ "AIQA_ORGANISATION_ID": "test-org",
116
+ },
117
+ ):
118
+ with patch("requests.get") as mock_get:
119
+ mock_response = MagicMock()
120
+ mock_response.status_code = 200
121
+ mock_response.json.return_value = mock_response_data
122
+
123
+ mock_get.return_value = mock_response
124
+
125
+ result = get_span("test-span-123")
126
+
127
+ assert result is None
128
+
129
+ def test_get_span_missing_server_url(self):
130
+ """Test that get_span raises ValueError when AIQA_SERVER_URL is not set."""
131
+ with patch.dict(os.environ, {}, clear=True):
132
+ with pytest.raises(ValueError, match="AIQA_SERVER_URL is not set"):
133
+ get_span("test-span-123")
134
+
135
+ def test_get_span_missing_organisation_id(self):
136
+ """Test that get_span raises ValueError when organisation ID is not provided."""
137
+ with patch.dict(
138
+ os.environ,
139
+ {
140
+ "AIQA_SERVER_URL": "http://localhost:3000",
141
+ "AIQA_API_KEY": "test-api-key",
142
+ },
143
+ clear=True,
144
+ ):
145
+ with pytest.raises(ValueError, match="Organisation ID is required"):
146
+ get_span("test-span-123")
147
+
148
+ def test_get_span_missing_api_key(self):
149
+ """Test that get_span raises ValueError when AIQA_API_KEY is not set."""
150
+ with patch.dict(
151
+ os.environ,
152
+ {
153
+ "AIQA_SERVER_URL": "http://localhost:3000",
154
+ "AIQA_ORGANISATION_ID": "test-org",
155
+ },
156
+ clear=True,
157
+ ):
158
+ with pytest.raises(ValueError, match="API key is required"):
159
+ get_span("test-span-123")
160
+
161
+ def test_get_span_with_organisation_id_parameter(self):
162
+ """Test that get_span uses organisation_id parameter when provided."""
163
+ span_data = {"id": "test-span-123", "name": "test_span"}
164
+ mock_response_data = {"hits": [span_data]}
165
+
166
+ with patch.dict(
167
+ os.environ,
168
+ {
169
+ "AIQA_SERVER_URL": "http://localhost:3000",
170
+ "AIQA_API_KEY": "test-api-key",
171
+ },
172
+ clear=True,
173
+ ):
174
+ with patch("requests.get") as mock_get:
175
+ mock_response = MagicMock()
176
+ mock_response.status_code = 200
177
+ mock_response.json.return_value = mock_response_data
178
+
179
+ mock_get.return_value = mock_response
180
+
181
+ result = get_span("test-span-123", organisation_id="param-org")
182
+
183
+ assert result == span_data
184
+ call_args = mock_get.call_args
185
+ assert call_args[1]["params"]["organisation"] == "param-org"
186
+
187
+ def test_get_span_server_error(self):
188
+ """Test that get_span raises ValueError on server error."""
189
+ with patch.dict(
190
+ os.environ,
191
+ {
192
+ "AIQA_SERVER_URL": "http://localhost:3000",
193
+ "AIQA_API_KEY": "test-api-key",
194
+ "AIQA_ORGANISATION_ID": "test-org",
195
+ },
196
+ ):
197
+ with patch("requests.get") as mock_get:
198
+ mock_response = MagicMock()
199
+ mock_response.status_code = 500
200
+ mock_response.text = "Internal Server Error"
201
+
202
+ mock_get.return_value = mock_response
203
+
204
+ with pytest.raises(ValueError, match="Failed to get span: 500"):
205
+ get_span("test-span-123")
206
+
207
+ def test_get_span_authorization_header(self):
208
+ """Test that get_span includes Authorization header with API key."""
209
+ span_data = {"id": "test-span-123"}
210
+ mock_response_data = {"hits": [span_data]}
211
+
212
+ with patch.dict(
213
+ os.environ,
214
+ {
215
+ "AIQA_SERVER_URL": "http://localhost:3000",
216
+ "AIQA_API_KEY": "test-api-key-123",
217
+ "AIQA_ORGANISATION_ID": "test-org",
218
+ },
219
+ ):
220
+ with patch("requests.get") as mock_get:
221
+ mock_response = MagicMock()
222
+ mock_response.status_code = 200
223
+ mock_response.json.return_value = mock_response_data
224
+
225
+ mock_get.return_value = mock_response
226
+
227
+ get_span("test-span-123")
228
+
229
+ call_args = mock_get.call_args
230
+ assert call_args[1]["headers"]["Authorization"] == "ApiKey test-api-key-123"