levelapp 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. levelapp/__init__.py +0 -0
  2. levelapp/aspects/__init__.py +8 -0
  3. levelapp/aspects/loader.py +253 -0
  4. levelapp/aspects/logger.py +59 -0
  5. levelapp/aspects/monitor.py +617 -0
  6. levelapp/aspects/sanitizer.py +168 -0
  7. levelapp/clients/__init__.py +122 -0
  8. levelapp/clients/anthropic.py +112 -0
  9. levelapp/clients/gemini.py +130 -0
  10. levelapp/clients/groq.py +101 -0
  11. levelapp/clients/huggingface.py +162 -0
  12. levelapp/clients/ionos.py +126 -0
  13. levelapp/clients/mistral.py +106 -0
  14. levelapp/clients/openai.py +116 -0
  15. levelapp/comparator/__init__.py +5 -0
  16. levelapp/comparator/comparator.py +232 -0
  17. levelapp/comparator/extractor.py +108 -0
  18. levelapp/comparator/schemas.py +61 -0
  19. levelapp/comparator/scorer.py +269 -0
  20. levelapp/comparator/utils.py +136 -0
  21. levelapp/config/__init__.py +5 -0
  22. levelapp/config/endpoint.py +199 -0
  23. levelapp/config/prompts.py +57 -0
  24. levelapp/core/__init__.py +0 -0
  25. levelapp/core/base.py +386 -0
  26. levelapp/core/schemas.py +24 -0
  27. levelapp/core/session.py +336 -0
  28. levelapp/endpoint/__init__.py +0 -0
  29. levelapp/endpoint/client.py +188 -0
  30. levelapp/endpoint/client_test.py +41 -0
  31. levelapp/endpoint/manager.py +114 -0
  32. levelapp/endpoint/parsers.py +119 -0
  33. levelapp/endpoint/schemas.py +38 -0
  34. levelapp/endpoint/tester.py +52 -0
  35. levelapp/evaluator/__init__.py +3 -0
  36. levelapp/evaluator/evaluator.py +307 -0
  37. levelapp/metrics/__init__.py +63 -0
  38. levelapp/metrics/embedding.py +56 -0
  39. levelapp/metrics/embeddings/__init__.py +0 -0
  40. levelapp/metrics/embeddings/sentence_transformer.py +30 -0
  41. levelapp/metrics/embeddings/torch_based.py +56 -0
  42. levelapp/metrics/exact.py +182 -0
  43. levelapp/metrics/fuzzy.py +80 -0
  44. levelapp/metrics/token.py +103 -0
  45. levelapp/plugins/__init__.py +0 -0
  46. levelapp/repository/__init__.py +3 -0
  47. levelapp/repository/filesystem.py +203 -0
  48. levelapp/repository/firestore.py +291 -0
  49. levelapp/simulator/__init__.py +3 -0
  50. levelapp/simulator/schemas.py +116 -0
  51. levelapp/simulator/simulator.py +531 -0
  52. levelapp/simulator/utils.py +134 -0
  53. levelapp/visualization/__init__.py +7 -0
  54. levelapp/visualization/charts.py +358 -0
  55. levelapp/visualization/dashboard.py +240 -0
  56. levelapp/visualization/exporter.py +167 -0
  57. levelapp/visualization/templates/base.html +158 -0
  58. levelapp/visualization/templates/comparator_dashboard.html +57 -0
  59. levelapp/visualization/templates/simulator_dashboard.html +111 -0
  60. levelapp/workflow/__init__.py +6 -0
  61. levelapp/workflow/base.py +192 -0
  62. levelapp/workflow/config.py +96 -0
  63. levelapp/workflow/context.py +64 -0
  64. levelapp/workflow/factory.py +42 -0
  65. levelapp/workflow/registration.py +6 -0
  66. levelapp/workflow/runtime.py +19 -0
  67. levelapp-0.1.15.dist-info/METADATA +571 -0
  68. levelapp-0.1.15.dist-info/RECORD +70 -0
  69. levelapp-0.1.15.dist-info/WHEEL +4 -0
  70. levelapp-0.1.15.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,291 @@
1
+ """levelapp/repository/firestore.py"""
2
+ from typing import List, Dict, Any, Type, TYPE_CHECKING
3
+ from pydantic import ValidationError
4
+
5
+ from google.cloud import firestore_v1
6
+ from google.cloud.firestore_v1 import DocumentSnapshot
7
+ from google.api_core.exceptions import ClientError, ServerError, NotFound, InvalidArgument, DeadlineExceeded
8
+ from google.auth.exceptions import DefaultCredentialsError
9
+
10
+ from levelapp.core.base import BaseRepository, Model
11
+ from levelapp.aspects import logger
12
+
13
+
14
+ if TYPE_CHECKING:
15
+ from levelapp.workflow.config import WorkflowConfig
16
+
17
+
18
+ class FirestoreRepository(BaseRepository):
19
+ """
20
+ Firestore implementation of BaseRepository.
21
+ (Uses hierarchical path: {user_id}/{collection_id}/{document_id}
22
+ """
23
+
24
+ def __init__(self, config: "WorkflowConfig | None"):
25
+ if config:
26
+ self.config = config
27
+ self.project_id: str | Any = config.repository.project_id
28
+ self.database_name: str | Any = config.repository.database_name
29
+ else:
30
+ self.project_id: str | Any = None
31
+ self.database_name: str | Any = '(default)'
32
+
33
+ self.client: firestore_v1.Client | None = None
34
+
35
+ def connect(self) -> None:
36
+ """
37
+ Connects to Firestore, prioritizing the project ID passed to the constructor.
38
+ """
39
+ try:
40
+ import google.auth
41
+ credentials, default_project_id = google.auth.default()
42
+
43
+ if not credentials:
44
+ raise ValueError(
45
+ "Failed to obtain credentials. "
46
+ "Please set GOOGLE_APPLICATION_CREDENTIALS "
47
+ "or run 'gcloud auth application-default login'."
48
+ )
49
+
50
+ project_id = self.project_id if self.project_id else default_project_id
51
+
52
+ self.client = firestore_v1.Client(
53
+ project=project_id,
54
+ credentials=credentials,
55
+ database=self.database_name
56
+ )
57
+
58
+ if not self.client:
59
+ raise ValueError("Failed to initialize Firestore client")
60
+
61
+ logger.info(
62
+ f"Successfully connected to Firestore. "
63
+ f"Project: '{self.client.project}', "
64
+ f"Scope: '{self.client.SCOPE}'"
65
+ )
66
+
67
+ except (ClientError, ServerError, DefaultCredentialsError, ValueError) as e:
68
+ logger.error(f"Failed to initialize Firestore client:\n{e}")
69
+
70
+ def close(self) -> None:
71
+ if self.client:
72
+ self.client.close()
73
+
74
+ def retrieve_document(
75
+ self,
76
+ collection_id: str,
77
+ section_id: str,
78
+ sub_collection_id: str,
79
+ document_id: str,
80
+ model_type: Type[Model]
81
+ ) -> Model | None:
82
+ """
83
+ Retrieves a document from Firestore.
84
+
85
+ Args:
86
+ collection_id (str): User reference.
87
+ section_id (str): Section reference.
88
+ sub_collection_id (str): Collection reference.
89
+ document_id (str): Document reference.
90
+ model_type (Type[Model]): Pydantic model for parsing.
91
+
92
+ Returns:
93
+ An instance of the provide Pydantic model.
94
+ """
95
+ if not self.client:
96
+ logger.error("Client connection lost")
97
+ return None
98
+
99
+ try:
100
+ doc_ref = (
101
+ self.client
102
+ .collection(collection_id)
103
+ .document(section_id)
104
+ .collection(sub_collection_id)
105
+ .document(document_id)
106
+ )
107
+ snapshot: DocumentSnapshot = doc_ref.get()
108
+
109
+ if not snapshot.exists:
110
+ logger.warning(f"Document '{document_id}' does not exist in Firestore")
111
+ return None
112
+
113
+ data = snapshot.to_dict()
114
+ return model_type.model_validate(data)
115
+
116
+ except NotFound as e:
117
+ logger.warning(f"Failed to retrieve Firestore document <ID:{document_id}>:\n{e}")
118
+ return None
119
+
120
+ except InvalidArgument as e:
121
+ logger.error(f"Invalid argument in document path <{sub_collection_id}/{sub_collection_id}/{document_id}>:\n{e}")
122
+ return None
123
+
124
+ except DeadlineExceeded as e:
125
+ logger.error(f"Request to retrieved document <ID:{document_id}> timout:\n{e}")
126
+ return None
127
+
128
+ except ValidationError as e:
129
+ logger.exception(f"Failed to parse the retrieved document <ID:{document_id}>:\n{e}")
130
+ return None
131
+
132
+ except Exception as e:
133
+ logger.exception(f"Failed to retrieve Firestore document <ID:{document_id}>:\n{e}")
134
+ return None
135
+
136
+ def store_document(
137
+ self,
138
+ collection_id: str,
139
+ section_id: str,
140
+ sub_collection_id: str,
141
+ document_id: str,
142
+ data: Model
143
+ ) -> None:
144
+ """
145
+ Stores a document in Firestore.
146
+
147
+ Args:
148
+ collection_id (str): Collection reference.
149
+ section_id (str): Section reference.
150
+ sub_collection_id (str): Sub-collection reference.
151
+ document_id (str): Document reference.
152
+ data (Model): An instance of the Pydantic model containing the data.
153
+ """
154
+ if not self.client:
155
+ logger.error("Client connection lost")
156
+
157
+ try:
158
+ doc_ref = (
159
+ self.client
160
+ .collection(collection_id)
161
+ .document(section_id)
162
+ .collection(sub_collection_id)
163
+ .document(document_id)
164
+ )
165
+ data = data.model_dump()
166
+ doc_ref.set(data)
167
+
168
+ except NotFound as e:
169
+ logger.warning(f"Failed to store Firestore document <ID:{document_id}>:\n{e}")
170
+ return None
171
+
172
+ except InvalidArgument as e:
173
+ logger.error(f"Invalid argument in document path <{sub_collection_id}/{sub_collection_id}/{document_id}>:\n{e}")
174
+ return None
175
+
176
+ except DeadlineExceeded as e:
177
+ logger.error(f"Request to retrieved document <ID:{document_id}> timout:\n{e}")
178
+ return None
179
+
180
+ except ValidationError as e:
181
+ logger.exception(f"Failed to parse the retrieved document <ID:{document_id}>:\n{e}")
182
+ return None
183
+
184
+ except Exception as e:
185
+ logger.exception(f"Failed to retrieve Firestore document <ID:{document_id}>:\n{e}")
186
+ return None
187
+
188
+ def query_collection(
189
+ self,
190
+ collection_id: str,
191
+ section_id: str,
192
+ sub_collection_id: str,
193
+ filters: Dict[str, Any],
194
+ model_type: Type[Model]
195
+ ) -> List[Model]:
196
+ """
197
+ Queries a collection with specified filters.
198
+
199
+ Args:
200
+ collection_id (str): Collection reference.
201
+ section_id (str): Section reference.
202
+ sub_collection_id (str): Sub-collection reference.
203
+ filters (Dict[str, Any]): A dictionary of key-value pairs to filter the query.
204
+ model_type (Type [Model]): The class to deserialize the documents into.
205
+
206
+ Returns:
207
+ A list of deserialized models that match the query.
208
+ """
209
+ if not self.client:
210
+ logger.error("Client connection lost")
211
+ return []
212
+
213
+ try:
214
+ collection_ref = self.client.collection('users', collection_id, sub_collection_id)
215
+ query = collection_ref
216
+
217
+ for key, value in filters.items():
218
+ query = query.where(key, "==", value)
219
+
220
+ results = []
221
+ for doc in query.stream():
222
+ if doc.exists and doc.to_dict():
223
+ results.append(model_type.model_validate(doc.to_dict()))
224
+
225
+ return results
226
+
227
+ except NotFound as e:
228
+ logger.warning(f"Collection for user '{collection_id}' not found:\n{e}")
229
+ return []
230
+
231
+ except InvalidArgument as e:
232
+ logger.error(f"Invalid query argument for user '{collection_id}':\n{e}")
233
+ return []
234
+
235
+ except DeadlineExceeded as e:
236
+ logger.error(f"Query for user '{collection_id}' timed out:\n{e}")
237
+ return []
238
+
239
+ except ValidationError as e:
240
+ logger.exception(f"Failed to parse a document from query results:\n{e}")
241
+ return []
242
+
243
+ except Exception as e:
244
+ logger.exception(f"An unexpected error occurred during collection query:\n{e}")
245
+ return []
246
+
247
+ def delete_document(
248
+ self,
249
+ collection_id: str,
250
+ section_id: str,
251
+ sub_collection_id: str,
252
+ document_id: str
253
+ ) -> bool:
254
+ """
255
+ Deletes a document from Firestore.
256
+
257
+ Fields:
258
+ collection_id (str): Collection reference.
259
+ section_id (str): Section reference.
260
+ sub_collection_id (str): Sub-collection reference.
261
+ document_id (str): Document reference.
262
+
263
+ Returns:
264
+ True if the document was deleted successfully, False otherwise.
265
+ """
266
+ if not self.client:
267
+ logger.error("Client connection lost")
268
+ return False
269
+
270
+ try:
271
+ doc_ref = self.client.collection(
272
+ collection_id,
273
+ section_id,
274
+ sub_collection_id
275
+ ).document(document_id)
276
+ doc_ref.delete()
277
+ logger.info(f"Document '{document_id}' deleted successfully.")
278
+ return True
279
+
280
+ except NotFound as e:
281
+ logger.warning(f"Failed to delete document. Document '{document_id}' not found:\n{e}")
282
+ return False
283
+ except InvalidArgument as e:
284
+ logger.error(f"Invalid argument in document path <{collection_id}/{sub_collection_id}/{document_id}>:\n{e}")
285
+ return False
286
+ except DeadlineExceeded as e:
287
+ logger.error(f"Request to delete document <ID:{document_id}> timed out:\n{e}")
288
+ return False
289
+ except Exception as e:
290
+ logger.exception(f"Failed to delete Firestore document <ID:{document_id}>:\n{e}")
291
+ return False
@@ -0,0 +1,3 @@
1
+ from .simulator import ConversationSimulator
2
+
3
+ __all__ = ['ConversationSimulator']
@@ -0,0 +1,116 @@
1
+ """
2
+ levelapp/simulator/schemas.py
3
+
4
+ Defines Pydantic models for simulator-related data structures,
5
+ including test configurations, batch metadata, and evaluation results.
6
+ """
7
+ from enum import Enum
8
+ from uuid import UUID, uuid4
9
+ from datetime import datetime
10
+
11
+ from typing import Dict, Any, List
12
+ from pydantic import BaseModel, Field, computed_field
13
+
14
+ from levelapp.evaluator.evaluator import JudgeEvaluationResults
15
+
16
+
17
+ class InteractionLevel(str, Enum):
18
+ """Enum representing the type of interaction."""
19
+ INITIAL = "initial"
20
+ INTERMEDIATE = "intermediate"
21
+ FINAL = "final"
22
+
23
+
24
+ class Interaction(BaseModel):
25
+ """Represents a single interaction within a conversation."""
26
+ id: UUID = Field(default_factory=uuid4, description="Interaction identifier")
27
+ user_message_path: str = Field(..., description="Path of the user message in the request payload")
28
+ user_message: str = Field(..., description="The user's query message")
29
+ reference_reply: str = Field(..., description="The preset reference message")
30
+ interaction_type: InteractionLevel = Field(default=InteractionLevel.INITIAL, description="Type of interaction")
31
+ reference_metadata: Dict[str, Any] = Field(default_factory=dict, description="Expected metadata")
32
+ guardrail_flag: Any = Field(default=False, description="Flag for guardrail signaling")
33
+ request_payload: Dict[str, Any] = Field(default_factory=dict, description="Additional request payload")
34
+
35
+
36
+ class ConversationScript(BaseModel):
37
+ """Represents a basic conversation with multiple interactions."""
38
+ id: UUID = Field(default_factory=uuid4, description="Conversation identifier")
39
+ interactions: List[Interaction] = Field(default_factory=list, description="List of interactions")
40
+ description: str = Field(default="no-description", description="A short description of the conversation")
41
+ details: Dict[str, str] = Field(default_factory=dict, description="Conversation details")
42
+ variable_request_schema: bool = Field(default=False, description="The payload schema changes for each request")
43
+ uuid_field: str | None = Field(default=None, description="field that requires a UUID value")
44
+
45
+
46
+ class ScriptsBatch(BaseModel):
47
+ id: UUID = Field(default_factory=uuid4, description="Batch identifier")
48
+ scripts: List[ConversationScript] = Field(default_factory=list, description="List of conversation scripts")
49
+
50
+
51
+ # ---- Interaction Details Models ----
52
+ class InteractionResults(BaseModel):
53
+ """Represents metadata extracted from a VLA interaction."""
54
+ generated_reply: str | None = "No response"
55
+ generated_metadata: Dict[str, Any] | None = {}
56
+ guardrail_flag: Any | None = False
57
+ interaction_type: str | None = ""
58
+
59
+
60
+ class InteractionEvaluationResults(BaseModel):
61
+ """Model representing the evaluation result of an interaction."""
62
+ judge_evaluations: Dict[str, JudgeEvaluationResults] | None = Field(default_factory=dict)
63
+ metadata_evaluation: Dict[str, float] | None = Field(default_factory=dict)
64
+ guardrail_flag: int | None = Field(default=None)
65
+ errors: Dict[str, Any] | None = Field(default_factory=dict)
66
+
67
+
68
+ class SingleInteractionResults(BaseModel):
69
+ """Represents metadata extracted from VLA interaction."""
70
+ conversation_id: str = Field(description="Conversation identifier")
71
+ user_message: str = Field(default="", description="The user's query message")
72
+ generated_reply: str = Field(default="Interaction request failed", description="The generated reply message")
73
+ reference_reply: str = Field(default="", description="The preset reference message")
74
+ generated_metadata: Dict[str, Any] = Field(default_factory=dict, description="Extracted metadata")
75
+ reference_metadata: Dict[str, Any] = Field(default_factory=dict, description="Expected metadata")
76
+ guardrail_details: bool | None = Field(default=None, description="Flag for guardrail signaling")
77
+ evaluation_results: InteractionEvaluationResults = Field(default_factory=InteractionEvaluationResults)
78
+ response_content: Dict[str, Any] = Field(default_factory=dict, description="Response content")
79
+ errors: Dict[str, Any] = Field(default_factory=dict, description="Captured errors")
80
+
81
+
82
+ class SingleAttemptResults(BaseModel):
83
+ attempt_nbr: int = Field(default=1, description="The attempt number")
84
+ attempt_id: str = Field(default=None, description="The attempt ID")
85
+ script_id: str = Field(default=None, description="The script ID")
86
+ total_duration: float = Field(default=None, description="Total duration")
87
+ interaction_results: List[SingleInteractionResults] = Field(default_factory=list)
88
+ evaluation_verdicts: Dict[str, List[str]] = Field(default_factory=dict)
89
+ average_scores: Dict[str, float] = Field(default_factory=dict)
90
+ interaction_summaries: List[str] = Field(default_factory=list)
91
+
92
+
93
+ class AllAttemptsResults(BaseModel):
94
+ script_id: str = Field(default=None, description="The script ID")
95
+ attempts: List[SingleAttemptResults] = Field(default_factory=list)
96
+ average_scores: Dict[str, float] = Field(default_factory=dict)
97
+
98
+
99
+ class SimulationResults(BaseModel):
100
+ # Collected data
101
+ started_at: datetime = datetime.now()
102
+ finished_at: datetime
103
+ # Collected Results
104
+ evaluation_summary: Dict[str, Any] | None = Field(default_factory=dict, description="Evaluation result")
105
+ average_scores: Dict[str, Any] | None = Field(default_factory=dict, description="Average scores")
106
+ script_results: List[AllAttemptsResults] | None = Field(default_factory=list, description="detailed results")
107
+
108
+ @computed_field
109
+ @property
110
+ def batch_id(self) -> str:
111
+ return str(uuid4())
112
+
113
+ @computed_field
114
+ @property
115
+ def elapsed_time(self) -> float:
116
+ return (self.finished_at - self.started_at).total_seconds()