levelapp 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of levelapp might be problematic. Click here for more details.

@@ -343,6 +343,7 @@ class FunctionMonitor:
343
343
  category: MetricType,
344
344
  enable_timing: bool,
345
345
  track_memory: bool,
346
+ verbose=False
346
347
  ) -> Callable[P, T]:
347
348
  """
348
349
  Wrap function execution with timing and error handling.
@@ -352,6 +353,7 @@ class FunctionMonitor:
352
353
  name: Unique identifier for the function
353
354
  enable_timing: Enable execution time logging
354
355
  track_memory: Enable memory tracking
356
+ verbose: Enable verbose logging
355
357
 
356
358
  Returns:
357
359
  Wrapped function
@@ -402,7 +404,7 @@ class FunctionMonitor:
402
404
 
403
405
  self._aggregated_stats[name].update(metrics=metrics)
404
406
 
405
- if enable_timing and metrics.duration is not None:
407
+ if verbose and enable_timing and metrics.duration is not None:
406
408
  log_message = f"[FunctionMonitor] Executed '{name}' in {metrics.duration:.4f}s"
407
409
  if metrics.cache_hit:
408
410
  log_message += " (cache hit)"
@@ -44,7 +44,6 @@ class ClientRegistry:
44
44
 
45
45
  cls._wrap_client_methods(client_class)
46
46
  cls._clients[provider] = client_class
47
- logger.info(f"[ClientRegistry] Registered client for provider: {provider}")
48
47
 
49
48
  @classmethod
50
49
  def _wrap_client_methods(cls, client_class: Type[BaseChatClient]) -> None:
@@ -78,7 +78,6 @@ class MetricsManager:
78
78
  ValueError: if the scorer is not a callable.
79
79
  """
80
80
  self._scorers[name] = scorer
81
- logger.info(f"[MetricsManager] Registered scorer: {name}")
82
81
 
83
82
  def get_scorer(self, name: str) -> Callable:
84
83
  """
@@ -95,7 +94,6 @@ class MetricsManager:
95
94
  """
96
95
  try:
97
96
  scorer = self._scorers.get(name)
98
- logger.info(f"[get_scorer] Retrieved scorer: {name}")
99
97
  return scorer
100
98
 
101
99
  except KeyError:
@@ -29,7 +29,6 @@ class EndpointConfig(BaseModel):
29
29
  bearer_token (SecretStr): The Bearer token to use.
30
30
  model_id (str): The model to use (if applicable).
31
31
  default_request_payload_template (Dict[str, Any]): The payload template to use.
32
- generated_request_payload_template (Dict[str, Any]): The generated payload template from a provided file.
33
32
  variables (Dict[str, Any]): The variables to populate the payload template.
34
33
 
35
34
  Note:
@@ -40,11 +39,10 @@ class EndpointConfig(BaseModel):
40
39
  - bearer_token (SecretStr): The Bearer token to use.
41
40
  - model_id (str): The model to use (if applicable).
42
41
  - default_payload_template (Dict[str, Any]): The payload template to use.
43
- - generated_payload_template (Dict[str, Any]): The generated payload template from a provided file.
44
42
  - variables (Dict[str, Any]): The variables to populate the payload template.
45
43
 
46
44
  Or manually configure the model instance by assigning the proper values to the model fields.\n
47
- You can also provide the path in the .env file for the payload template (ENDPOINT_PAYLOAD_PATH)
45
+ You can also provide the path in the .env file for the payload template (ENDPOINT_PAYLOAD_PATH/)
48
46
  and the response template (ENDPOINT_RESPONSE_PATH) separately. The files can be either YAML or JSON only.
49
47
  """
50
48
  load_dotenv()
@@ -61,9 +59,7 @@ class EndpointConfig(BaseModel):
61
59
 
62
60
  # Data
63
61
  default_request_payload_template: Dict[str, Any] = Field(default_factory=dict)
64
- generated_request_payload_template: Dict[str, Any] = Field(default_factory=dict)
65
62
  default_response_payload_template: Dict[str, Any] = Field(default_factory=dict)
66
- generated_response_payload_template: Dict[str, Any] = Field(default_factory=dict)
67
63
 
68
64
  # Variables
69
65
  variables: Dict[str, Any] = Field(default_factory=dict)
@@ -88,14 +84,18 @@ class EndpointConfig(BaseModel):
88
84
  @computed_field
89
85
  @property
90
86
  def request_payload(self) -> Dict[str, Any]:
91
- """Return fully prepared payload depending on template or full payload."""
92
- # First, load the request payload template (either from YAML config file or from specific template)
87
+ """
88
+ Return fully prepared payload depending on template or full payload.
89
+
90
+ Returns:
91
+ request payload (Dict[str, Any]): Populated request payload template.
92
+ """
93
+ # First, we check if we have variables to populate the template with. If not, we return the template as is.
93
94
  if not self.variables:
94
95
  return self.default_request_payload_template
95
96
 
96
97
  if not self.default_request_payload_template:
97
- self.load_template(template_type=TemplateType.REQUEST)
98
- base_template = self.generated_request_payload_template
98
+ base_template = self.load_template(template_type=TemplateType.REQUEST)
99
99
  else:
100
100
  base_template = self.default_request_payload_template
101
101
 
@@ -118,8 +118,7 @@ class EndpointConfig(BaseModel):
118
118
  return self.default_response_payload_template
119
119
 
120
120
  if not self.default_response_payload_template:
121
- self.load_template(template_type=TemplateType.RESPONSE)
122
- base_template = self.generated_response_payload_template
121
+ base_template = self.load_template(template_type=TemplateType.RESPONSE)
123
122
  else:
124
123
  base_template = self.default_response_payload_template
125
124
 
@@ -148,12 +147,23 @@ class EndpointConfig(BaseModel):
148
147
 
149
148
  return _replace(obj)
150
149
 
150
+ @staticmethod
151
151
  def load_template(
152
- self,
153
152
  template_type: TemplateType = TemplateType.REQUEST,
154
153
  path: str | None = None
155
154
  ) -> Dict[str, Any]:
155
+ """
156
+ Load request/response payload template from JSON/YAML file.
157
+
158
+ Args:
159
+ template_type (TemplateType): The type of template to load (REQUEST or RESPONSE).
160
+ path (str): The path of the payload template file to load.
161
+
162
+ Returns:
163
+ Payload template (Dict[str, Any]): Payload template.
164
+ """
156
165
  try:
166
+ # If no path was provided, we check the env. variables.
157
167
  if not path:
158
168
  env_var = "ENDPOINT_PAYLOAD_PATH" if template_type == TemplateType.REQUEST else "ENDPOINT_RESPONSE_PATH"
159
169
  path = os.getenv(env_var, '')
@@ -171,7 +181,6 @@ class EndpointConfig(BaseModel):
171
181
  else:
172
182
  raise ValueError("[EndpointConfig] Unsupported file format.")
173
183
 
174
- self.generated_request_payload_template = data
175
184
  return data
176
185
 
177
186
  except FileNotFoundError as e:
@@ -0,0 +1,62 @@
1
+ from abc import ABC
2
+ from enum import Enum
3
+ from typing import Any, List
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class HttpMethod(str, Enum):
9
+ GET = "GET"
10
+ POST = "POST"
11
+ PUT = "PUT"
12
+ Patch = "PATCH"
13
+ DELETE = "DELETE"
14
+
15
+
16
+ class HeaderConfig(BaseModel):
17
+ """Secure header configuration with environment variables support."""
18
+ name: str
19
+ value: str
20
+ secure: bool = False
21
+
22
+ class Config:
23
+ frozen = True
24
+
25
+
26
+ class RequestSchemaConfig(BaseModel):
27
+ """Schema definition for request payload population."""
28
+ field_path: str # JSON path-like: "data.user.id"
29
+ value: Any
30
+ value_type: str = "static" # static, env, dynamic
31
+ required: bool = True
32
+
33
+
34
+ class ResponseMappingConfig(BaseModel):
35
+ """Response data extraction mapping."""
36
+ field_path: str # JSON path-like: "data.results[0].id"
37
+ extract_as: str # Name to extract as
38
+ default: Any = None
39
+
40
+
41
+ class EndpointConfig(BaseModel):
42
+ """Complete endpoint configuration."""
43
+ name: str
44
+ base_url: str
45
+ path: str
46
+ method: HttpMethod
47
+ headers: List[HeaderConfig] = Field(default_factory=list)
48
+ request_schema: List[RequestSchemaConfig] = Field(default_factory=list)
49
+ response_mapping: List[ResponseMappingConfig] = Field(default_factory=list)
50
+ timeout: int = 30
51
+ retry_count: int = 3
52
+ retry_backoff: float = 1.0
53
+
54
+ @classmethod
55
+ def validate_path(cls, v: str) -> str:
56
+ if not v.startswith('/'):
57
+ return f'/{v}'
58
+ return v
59
+
60
+
61
+ class PayloadBuilder(ABC):
62
+ """Abstract base for payload construction strategies."""
@@ -33,3 +33,25 @@ Return ONLY a single JSON object on one line with exactly these keys:
33
33
 
34
34
  Do NOT include any additional text, explanations, or formatting (e.g., "JSON object:", ```json or ```, or markdown).
35
35
  """
36
+
37
+
38
+ SUMMARIZATION_PROMPT_TEMPLATE = """
39
+ You are reviewing evaluation justifications from LLM judges about replies generated by a virtual assistant.
40
+ Interpret the context from the verdicts: (e.g., real-estate leasing, medical appointment scheduling, etc.).
41
+
42
+ Each justification contains the judge's assessment of how well the assistant's response matched the expected reply.
43
+ Your task is to **identify and summarize only the negative points**, such as:
44
+ - Errors or inaccuracies
45
+ - Misunderstandings or misinterpretations
46
+ - Missing or incomplete information
47
+ - Failure to meet expectations or requirements
48
+
49
+ **Instructions:**
50
+ - Return up to {max_bullets} concise bullet points.
51
+ - Start each point with "- " and focus on clarity and relevance.
52
+ - Avoid redundancy and prioritize actionable feedback.
53
+
54
+ ---
55
+ - Judge: {judge}
56
+ - Verdicts: {verdicts}
57
+ """
levelapp/core/schemas.py CHANGED
@@ -22,5 +22,3 @@ class EvaluatorType(ExtendedEnum):
22
22
  JUDGE = "JUDGE"
23
23
  REFERENCE = "REFERENCE"
24
24
  RAG = "RAG"
25
-
26
-
levelapp/core/session.py CHANGED
@@ -63,6 +63,14 @@ class StepContext:
63
63
  step_name: str,
64
64
  category: MetricType,
65
65
  ):
66
+ """
67
+ Initialize StepContext.
68
+
69
+ Args:
70
+ session (EvaluationSession): Evaluation session.
71
+ step_name (str): Step name.
72
+ category (MetricType): Metric type.
73
+ """
66
74
  self.session = session
67
75
  self.step_name = step_name
68
76
  self.category = category
@@ -88,6 +96,7 @@ class StepContext:
88
96
  category=self.category,
89
97
  enable_timing=True,
90
98
  track_memory=True,
99
+ verbose=self.session.verbose,
91
100
  )(self._step_wrapper)
92
101
 
93
102
  # Start monitoring
@@ -119,7 +128,7 @@ class StepContext:
119
128
 
120
129
  self.session.session_metadata.total_executions += 1
121
130
 
122
- if self.step_meta.duration:
131
+ if self.session.enable_monitoring and self.step_meta.duration:
123
132
  self.session.monitor.update_procedure_duration(
124
133
  name=self.full_step_name,
125
134
  value=self.step_meta.duration
@@ -136,6 +145,7 @@ class EvaluationSession:
136
145
  session_name: str = "test-session",
137
146
  workflow_config: WorkflowConfig | None = None,
138
147
  enable_monitoring: bool = True,
148
+ verbose: bool = False
139
149
  ):
140
150
  """
141
151
  Initialize Evaluation Session.
@@ -143,12 +153,15 @@ class EvaluationSession:
143
153
  Args:
144
154
  session_name (str): Name of the session
145
155
  workflow_config (WorkflowConfig): Workflow configuration.
156
+ enable_monitoring (bool): Switch monitoring on. Defaults to True.
157
+ verbose (bool): Verbose mode. Defaults to False.
146
158
  """
147
159
  self._NAME = self.__class__.__name__
148
160
 
149
161
  self.session_name = session_name
150
162
  self.workflow_config = workflow_config
151
163
  self.enable_monitoring = enable_monitoring
164
+ self.verbose = verbose
152
165
 
153
166
  self.workflow: BaseWorkflow | None = None
154
167
 
@@ -176,7 +189,7 @@ class EvaluationSession:
176
189
  self.workflow = MainFactory.create_workflow(context=context)
177
190
 
178
191
  logger.info(
179
- f"[{self._NAME}] Starting evaluation session: {self.session_name}, "
192
+ f"[{self._NAME}] Starting evaluation session: {self.session_name} - "
180
193
  f"Workflow: '{self.workflow.name}'"
181
194
  )
182
195
  return self
@@ -190,6 +203,7 @@ class EvaluationSession:
190
203
 
191
204
  if exc_type:
192
205
  logger.error(f"[{self._NAME}] Session ended with error: {exc_val}", exc_info=True)
206
+
193
207
  return False
194
208
 
195
209
  def step(self, step_name: str, category: MetricType = MetricType.CUSTOM) -> StepContext:
@@ -213,6 +227,19 @@ class EvaluationSession:
213
227
  self.workflow.collect_results()
214
228
 
215
229
  def get_stats(self) -> Dict[str, Any]:
230
+ if self.enable_monitoring:
231
+ return {
232
+ "session": {
233
+ "name": self.session_name,
234
+ "duration": precisedelta(self.session_metadata.duration, suppress=['minutes']),
235
+ "start_time": self.session_metadata.started_at.isoformat(),
236
+ "end_time": self.session_metadata.ended_at.isoformat(),
237
+ "steps": len(self.session_metadata.steps),
238
+ "errors": sum(s.error_count for s in self.session_metadata.steps.values())
239
+ },
240
+ "stats": self.monitor.get_all_stats()
241
+ }
242
+
216
243
  return {
217
244
  "session": {
218
245
  "name": self.session_name,
@@ -222,5 +249,4 @@ class EvaluationSession:
222
249
  "steps": len(self.session_metadata.steps),
223
250
  "errors": sum(s.error_count for s in self.session_metadata.steps.values())
224
251
  },
225
- "stats": self.monitor.get_all_stats()
226
252
  }
@@ -41,7 +41,7 @@ class JudgeEvaluationResults(BaseModel):
41
41
  label: str = Field(..., description="The label of the evaluation result")
42
42
  justification: str = Field(..., description="Short explanation of the evaluation result")
43
43
  evidence: Evidence = Field(default_factory=Evidence, description="Detailed evidence for the evaluation")
44
- raw_response: Dict[str, Any] = Field(..., description="Full unprocessed API response")
44
+ raw_response: Dict[str, Any] = Field(..., description="Full unprocessed API response", exclude=True)
45
45
  metadata: Dict[str, Any] = Field(..., description="Metadata about the evaluation result")
46
46
 
47
47
  @classmethod
@@ -71,7 +71,14 @@ class JudgeEvaluationResults(BaseModel):
71
71
 
72
72
 
73
73
  class JudgeEvaluator(BaseEvaluator):
74
+ """LLM-as-a-judge evaluator class"""
74
75
  def __init__(self, config: "WorkflowConfig | None" = None):
76
+ """
77
+ Initialize the JudgeEvaluator.
78
+
79
+ Args:
80
+ config (WorkflowConfig | None): The configuration of the workflow.
81
+ """
75
82
  if config:
76
83
  self.config = config
77
84
  self.providers = config.evaluation.providers
@@ -206,7 +213,6 @@ class JudgeEvaluator(BaseEvaluator):
206
213
  ):
207
214
  with attempt:
208
215
  response = await client.acall(message=prompt)
209
- logger.info(f"[{provider}] Async evaluation:\n{response}\n{'---' * 10}")
210
216
  parsed = client.parse_response(response=response)
211
217
  return JudgeEvaluationResults.from_parsed(provider=provider, parsed=parsed, raw=response)
212
218
 
@@ -224,7 +230,14 @@ class JudgeEvaluator(BaseEvaluator):
224
230
 
225
231
 
226
232
  class MetadataEvaluator(BaseEvaluator):
227
- def __init__(self, config: "WorkflowConfig | None"= None):
233
+ """Metadata evaluator class."""
234
+ def __init__(self, config: "WorkflowConfig | None" = None):
235
+ """
236
+ Initialize the MetadataEvaluator.
237
+
238
+ Args:
239
+ config (WorkflowConfig | None): The workflow configuration.
240
+ """
228
241
  if config:
229
242
  self.config = config
230
243
  self.metics_map = config.evaluation.metrics_map
@@ -261,7 +274,6 @@ class MetadataEvaluator(BaseEvaluator):
261
274
  self.comparator.reference_data = ref_data
262
275
 
263
276
  output = self.comparator.run(indexed_mode=False)
264
- logger.info(f"Comparison results:\n{output}\n---")
265
277
  results: Dict[str, float] = {}
266
278
 
267
279
  for k, v in output.items():
@@ -1,14 +1,11 @@
1
1
  """levelapp/metrics/__init__.py"""
2
- import logging
3
-
4
2
  from typing import List, Dict, Type, Any
5
3
 
4
+ from levelapp.aspects import logger
6
5
  from levelapp.core.base import BaseMetric
7
6
  from levelapp.metrics.exact import EXACT_METRICS
8
7
  from levelapp.metrics.fuzzy import FUZZY_METRICS
9
8
 
10
- logger = logging.getLogger(__name__)
11
-
12
9
 
13
10
  class MetricRegistry:
14
11
  """Registry for metric classes."""
@@ -27,7 +24,6 @@ class MetricRegistry:
27
24
  raise KeyError(f"Metric '{name}' is already registered")
28
25
 
29
26
  cls._metrics[name] = metric_class
30
- logger.info(f"Metric '{name}' registered successfully.")
31
27
 
32
28
  @classmethod
33
29
  def get(cls, name: str, **kwargs: Any) -> BaseMetric:
@@ -9,7 +9,7 @@ from uuid import UUID, uuid4
9
9
  from datetime import datetime
10
10
 
11
11
  from typing import Dict, Any, List
12
- from pydantic import BaseModel, Field, computed_field
12
+ from pydantic import BaseModel, Field, computed_field, field_validator
13
13
 
14
14
  from levelapp.evaluator.evaluator import JudgeEvaluationResults
15
15
 
@@ -64,26 +64,20 @@ class InteractionEvaluationResults(BaseModel):
64
64
 
65
65
 
66
66
  class SimulationResults(BaseModel):
67
- # Initial data
68
- project_id: str = Field(default_factory=uuid4, description="Project identifier")
69
- user_id: str = Field(default_factory=uuid4, description="User identifier")
70
- batch_id: str = Field(default_factory=uuid4, description="Batch identifier")
71
67
  # Collected data
72
68
  started_at: datetime = datetime.now()
73
69
  finished_at: datetime
74
70
  # Collected Results
75
71
  evaluation_summary: Dict[str, Any] | None = Field(default_factory=dict, description="Evaluation result")
76
72
  average_scores: Dict[str, Any] | None = Field(default_factory=dict, description="Average scores")
73
+ interaction_results: List[Dict[str, Any]] | None = Field(default_factory=list, description="detailed results")
74
+
75
+ @computed_field
76
+ @property
77
+ def batch_id(self) -> str:
78
+ return str(uuid4())
77
79
 
78
80
  @computed_field
79
81
  @property
80
82
  def elapsed_time(self) -> float:
81
83
  return (self.finished_at - self.started_at).total_seconds()
82
-
83
-
84
- class TestResults(BaseModel):
85
- api_host: str = Field(..., alias="apiHost")
86
- ionos_model_name: str = Field(..., alias="ionosModelName")
87
- test_name: str = Field(..., alias="testName")
88
- test_type: str = Field(..., alias="testType")
89
- batch_details: SimulationResults | None = Field(..., alias="results")
@@ -1,6 +1,7 @@
1
1
  """
2
2
  'simulators/service.py': Service layer to manage conversation simulation and evaluation.
3
3
  """
4
+ import json
4
5
  import time
5
6
  import asyncio
6
7
 
@@ -92,6 +93,15 @@ class ConversationSimulator(BaseProcess):
92
93
  self._headers = endpoint_config.headers
93
94
 
94
95
  def get_evaluator(self, name: EvaluatorType) -> BaseEvaluator:
96
+ """
97
+ Retrieve an evaluator by name.
98
+
99
+ Args:
100
+ name (EvaluatorType): Name of evaluator.
101
+
102
+ Returns:
103
+ An evaluator object.
104
+ """
95
105
  _LOG: str = f"[{self._CLASS_NAME}][{self.get_evaluator.__name__}]"
96
106
 
97
107
  if name not in self.evaluators:
@@ -103,7 +113,7 @@ class ConversationSimulator(BaseProcess):
103
113
  self,
104
114
  test_batch: ScriptsBatch,
105
115
  attempts: int = 1,
106
- ) -> Dict[str, Any]:
116
+ ) -> Any:
107
117
  """
108
118
  Run a batch test for the given batch name and details.
109
119
 
@@ -129,9 +139,10 @@ class ConversationSimulator(BaseProcess):
129
139
  finished_at=finished_at,
130
140
  evaluation_summary=self.verdict_summaries,
131
141
  average_scores=results.get("average_scores", {}),
142
+ interaction_results=results.get("results")
132
143
  )
133
144
 
134
- return {"results": results, "status": "COMPLETE"}
145
+ return results.model_dump_json(indent=2)
135
146
 
136
147
  async def simulate_conversation(self, attempts: int = 1) -> Dict[str, Any]:
137
148
  """
@@ -171,10 +182,11 @@ class ConversationSimulator(BaseProcess):
171
182
  verdicts=verdicts, judge=judge
172
183
  )
173
184
 
174
- return {"scripts": results, "average_scores": overall_average_scores}
185
+ return {"results": results, "average_scores": overall_average_scores}
175
186
 
176
187
  async def simulate_single_scenario(
177
- self, script: ConversationScript, attempts: int = 1
188
+ self, script: ConversationScript,
189
+ attempts: int = 1
178
190
  ) -> Dict[str, Any]:
179
191
  """
180
192
  Simulate a single scenario with the given number of attempts, concurrently.
@@ -193,19 +205,18 @@ class ConversationSimulator(BaseProcess):
193
205
  all_attempts_verdicts: Dict[str, List[str]] = defaultdict(list)
194
206
 
195
207
  async def simulate_attempt(attempt_number: int) -> Dict[str, Any]:
196
- logger.info(f"{_LOG} Running attempt: {attempt_number + 1}/{attempts}")
208
+ logger.info(f"{_LOG} Running attempt: {attempt_number + 1}/{attempts}\n---")
197
209
  start_time = time.time()
198
210
 
199
211
  collected_scores: Dict[str, List[Any]] = defaultdict(list)
200
212
  collected_verdicts: Dict[str, List[str]] = defaultdict(list)
201
213
 
202
- initial_interaction_results = await self.simulate_interactions(
214
+ interaction_results = await self.simulate_interactions(
203
215
  script=script,
204
216
  evaluation_verdicts=collected_verdicts,
205
217
  collected_scores=collected_scores,
206
218
  )
207
219
 
208
- logger.info(f"{_LOG} collected_scores: {collected_scores}\n---")
209
220
  single_attempt_scores = calculate_average_scores(collected_scores)
210
221
 
211
222
  for target, scores in single_attempt_scores.items():
@@ -225,7 +236,7 @@ class ConversationSimulator(BaseProcess):
225
236
  "attempt": attempt_number + 1,
226
237
  "script_id": script.id,
227
238
  "total_duration": elapsed_time,
228
- "interaction_results": initial_interaction_results,
239
+ "interaction_results": interaction_results,
229
240
  "evaluation_verdicts": collected_verdicts,
230
241
  "average_scores": single_attempt_scores,
231
242
  }
@@ -238,10 +249,6 @@ class ConversationSimulator(BaseProcess):
238
249
  for judge_, verdicts_ in all_attempts_verdicts.items():
239
250
  self.evaluation_verdicts[judge_].extend(verdicts_)
240
251
 
241
- logger.info(
242
- f"{_LOG} average scores:\n{average_scores}\n---"
243
- )
244
-
245
252
  return {
246
253
  "script_id": script.id,
247
254
  "attempts": attempt_results,
@@ -324,8 +331,6 @@ class ConversationSimulator(BaseProcess):
324
331
  reference_guardrail=reference_guardrail_flag,
325
332
  )
326
333
 
327
- logger.info(f"{_LOG} Evaluation results:\n{evaluation_results.model_dump()}\n")
328
-
329
334
  self.store_evaluation_results(
330
335
  results=evaluation_results,
331
336
  evaluation_verdicts=evaluation_verdicts,
@@ -333,9 +338,7 @@ class ConversationSimulator(BaseProcess):
333
338
  )
334
339
 
335
340
  elapsed_time = time.time() - start_time
336
- logger.info(
337
- f"{_LOG} Interaction simulation complete in {elapsed_time:.2f} seconds.\n---"
338
- )
341
+ logger.info(f"{_LOG} Interaction simulation complete in {elapsed_time:.2f} seconds.\n---")
339
342
 
340
343
  result = {
341
344
  "user_message": user_message,
@@ -494,7 +497,7 @@ class ConversationSimulator(BaseProcess):
494
497
  collected_scores (Dict[str, List[Any]]): The collected scores.
495
498
  """
496
499
  for provider in results.judge_evaluations.keys():
497
- evaluation_verdicts[f"{provider}_verdicts_summary"].append(
500
+ evaluation_verdicts[f"{provider}"].append(
498
501
  results.judge_evaluations.get(provider, "").justification
499
502
  )
500
503
 
@@ -1,17 +1,18 @@
1
1
  """
2
2
  'simulators/aspects.py': Utility functions for handling VLA interactions and requests.
3
3
  """
4
+ import ast
4
5
  import json
5
-
6
6
  import httpx
7
- import arrow
8
7
 
9
8
  from uuid import UUID
10
- from typing import Dict, Any, Optional, List, Union
9
+ from string import Template
10
+ from typing import Any, Dict, List, Union
11
11
 
12
- from openai import OpenAI
13
12
  from pydantic import ValidationError
14
13
 
14
+ from levelapp.clients import ClientRegistry
15
+ from levelapp.config.prompts import SUMMARIZATION_PROMPT_TEMPLATE
15
16
  from levelapp.simulator.schemas import InteractionResults
16
17
  from levelapp.aspects import MonitoringAspect, MetricType, logger
17
18
 
@@ -48,7 +49,14 @@ def extract_interaction_details(
48
49
  missing_keys = required_keys - response_dict.keys()
49
50
  logger.warning(f"[extract_interaction_details] Missing data: {missing_keys}]")
50
51
 
51
- return InteractionResults.model_validate(response_dict)
52
+ output = {}
53
+ for k, v in template.items():
54
+ output[k] = Template(v).safe_substitute(response_dict)
55
+
56
+ raw_value = output.get("generated_metadata", {})
57
+ output["generated_metadata"] = ast.literal_eval(raw_value) if isinstance(raw_value, str) else raw_value
58
+
59
+ return InteractionResults.model_validate(output)
52
60
 
53
61
  except json.JSONDecodeError as e:
54
62
  logger.error(f"[extract_interaction_details] Failed to extract details:\n{e}")
@@ -64,7 +72,7 @@ async def async_interaction_request(
64
72
  url: str,
65
73
  headers: Dict[str, str],
66
74
  payload: Dict[str, Any],
67
- ) -> Optional[httpx.Response]:
75
+ ) -> httpx.Response | None:
68
76
  """
69
77
  Perform an asynchronous interaction request.
70
78
 
@@ -74,7 +82,7 @@ async def async_interaction_request(
74
82
  payload (Dict[str, Any]): The payload to send in the request.
75
83
 
76
84
  Returns:
77
- Optional[httpx.Response]: The response from the interaction request, or None if an error occurred.
85
+ httpx.Response: The response from the interaction request, or None if an error occurred.
78
86
  """
79
87
  try:
80
88
  async with httpx.AsyncClient(timeout=180) as client:
@@ -92,42 +100,6 @@ async def async_interaction_request(
92
100
  return None
93
101
 
94
102
 
95
- def parse_date_value(raw_date_value: Optional[str], default_date_value: Optional[str] = "") -> str:
96
- """
97
- Cleans and parses a dehumanized relative date string to ISO format.
98
-
99
- Args:
100
- raw_date_value (Optional[str]): The raw date value to parse.
101
- default_date_value (Optional[str]): The default value to return if parsing fails. Defaults to an empty string.
102
-
103
- Returns:
104
- str: The parsed date in ISO format, or the default value if parsing fails.
105
- """
106
- if not raw_date_value:
107
- logger.info(f"[parse_date_value] No raw value provided. returning default: '{default_date_value}'")
108
- return default_date_value
109
-
110
- clean = raw_date_value.replace("{{", "").replace("}}", "").replace("_", " ").strip()
111
- clean += 's' if not clean.endswith('s') else clean
112
-
113
- try:
114
- arw = arrow.utcnow()
115
- parsed_date = arw.dehumanize(clean).utcnow().format('YYYY-MM-DD')
116
- return parsed_date
117
-
118
- except arrow.parser.ParserError as e:
119
- logger.error(f"[parse_date_value] Failed to parse date: '{clean}'\nParserError: {str(e)}", exc_info=True)
120
- return default_date_value
121
-
122
- except ValueError as e:
123
- logger.error(f"[parse_date_value] Invalid date value: '{clean}'\nValueError: {str(e)}", exc_info=True)
124
- return default_date_value
125
-
126
- except Exception as e:
127
- logger.error(f"[parse_date_value] Unexpected error.\nException: {str(e)}", exc_info=True)
128
- return default_date_value
129
-
130
-
131
103
  @MonitoringAspect.monitor(
132
104
  name="average_calc",
133
105
  category=MetricType.SCORING,
@@ -157,45 +129,35 @@ def calculate_average_scores(scores: Dict[str, Union[List[float], float]]) -> Di
157
129
 
158
130
 
159
131
  @MonitoringAspect.monitor(name="summarization", category=MetricType.API_CALL)
160
- def summarize_verdicts(verdicts: List[str], judge: str, max_bullets: int = 5) -> List[str]:
161
- """
162
- Summarize the justifications for each judge.
163
-
164
- Args:
165
- verdicts (List[str]): A list of justifications.
166
- judge (str): The judge or evaluator (provider) name for context.
167
- max_bullets (int): The maximum number of bullets allowed per judge.
168
-
169
- Returns:
170
- List[str]: The summarized justifications.
171
- """
172
- if not verdicts:
173
- return []
174
-
175
- prompt = f"""
176
- You are reviewing evaluation justifications from LL judges about replies generated by a virtual leasing agent.\n
177
- Each justification contains the judge's assessment of how well the agent's response matched the expected reply.\n
178
- Your task is to identify and summarize only the **negative points**, such as errors, misunderstandings,
179
- missing information, or failure to meet expectations.\n
180
- Return up to {max_bullets} bullet points. Be concise and start each point with '- '\n\n
181
- ---
182
- - Judge: {judge}
183
- - Justifications:\n{chr(10).join(verdicts)}\n
184
- """
185
-
186
- client = OpenAI()
132
+ def summarize_verdicts(
133
+ verdicts: List[str],
134
+ judge: str,
135
+ max_bullets: int = 5
136
+ ) -> List[str]:
137
+ client_registry = ClientRegistry()
138
+ client = client_registry.get(provider=judge)
187
139
 
188
140
  try:
189
- result = client.chat.completions.create(
190
- model="gpt-4o-mini",
191
- temperature=0,
192
- messages=[{"role": "user", "content": prompt}]
193
- ).choices[0].message.content
141
+ verdicts = chr(10).join(verdicts)
142
+ prompt = SUMMARIZATION_PROMPT_TEMPLATE.format(max_bullets=max_bullets, judge=judge, verdicts=verdicts)
143
+ response = client.call(message=prompt)
144
+ parsed = client.parse_response(response=response)
145
+ striped = parsed.get("output", "").strip("")
146
+ bullet_points = [point.strip() for point in striped.split("- ") if point.strip()]
194
147
 
195
- bullet_points = [point.strip() for point in result.split('- ') if point.strip()]
196
-
197
- return bullet_points
148
+ return bullet_points[:max_bullets]
198
149
 
199
150
  except Exception as e:
200
151
  logger.error(f"[summarize_justifications] Error during summarization: {str(e)}", exc_info=True)
201
152
  return []
153
+
154
+
155
+ # if __name__ == '__main__':
156
+ # template = {'generated_reply': '${agent_reply}', 'generated_metadata': '${generated_metadata}'}
157
+ # response_dict = {
158
+ # 'agent_reply': "I'd be happy to help you book something for 10 AM.",
159
+ # 'generated_metadata': {'appointment_type': 'Cardiology', 'date': 'next Monday', 'time': '10 AM'}
160
+ # }
161
+ #
162
+ # result = extract_interaction_details(response_dict, template)
163
+ # print(f"result: {result.model_dump()}")
levelapp/workflow/base.py CHANGED
@@ -4,7 +4,7 @@ from abc import ABC, abstractmethod
4
4
  from pydantic import ValidationError
5
5
  from functools import partial
6
6
  from pathlib import Path
7
- from typing import Any
7
+ from typing import Any, Dict
8
8
 
9
9
  from levelapp.core.base import BaseProcess
10
10
  from levelapp.simulator.schemas import ScriptsBatch
@@ -60,15 +60,32 @@ class BaseWorkflow(ABC):
60
60
  self._results = await loop.run_in_executor(None, func, None)
61
61
 
62
62
  def collect_results(self) -> Any:
63
- """Return unified results structure."""
63
+ """
64
+ Return unified results structure.
65
+
66
+ Returns:
67
+ The simulation results.
68
+ """
64
69
  return self._results
65
70
 
66
71
  @abstractmethod
67
72
  def _setup_process(self, context: WorkflowContext) -> BaseProcess:
73
+ """
74
+ Abstract method for setting up the configured process.
75
+
76
+ Args:
77
+ context (WorkflowContext): The workflow context.
78
+ """
68
79
  raise NotImplementedError
69
80
 
70
81
  @abstractmethod
71
82
  def _load_input_data(self, context: WorkflowContext) -> Any:
83
+ """
84
+ Abstract method for loading reference data.
85
+
86
+ Args:
87
+ context (WorkflowContext): The workflow context.
88
+ """
72
89
  raise NotImplementedError
73
90
 
74
91
 
@@ -77,6 +94,15 @@ class SimulatorWorkflow(BaseWorkflow):
77
94
  super().__init__(name="ConversationSimulator", context=context)
78
95
 
79
96
  def _setup_process(self, context: WorkflowContext) -> BaseProcess:
97
+ """
98
+ Concrete implementation for setting up the simulation workflow.
99
+
100
+ Args:
101
+ context (WorkflowContext): The workflow context for the simulation workflow.
102
+
103
+ Returns:
104
+ ConversationSimulator instance.
105
+ """
80
106
  simulator = ConversationSimulator()
81
107
  simulator.setup(
82
108
  repository=context.repository,
@@ -86,7 +112,16 @@ class SimulatorWorkflow(BaseWorkflow):
86
112
  )
87
113
  return simulator
88
114
 
89
- def _load_input_data(self, context: WorkflowContext) -> Any:
115
+ def _load_input_data(self, context: WorkflowContext) -> Dict[str, Any]:
116
+ """
117
+ Concrete implementation for loading the reference data.
118
+
119
+ Args:
120
+ context (WorkflowContext): The workflow context for the simulation workflow.
121
+
122
+ Returns:
123
+ Dict[str, Any]: The reference data.
124
+ """
90
125
  loader = DataLoader()
91
126
  if "reference_data" in context.inputs:
92
127
  data_config = context.inputs["reference_data"]
@@ -2,6 +2,7 @@
2
2
  from typing import List, Dict, Any, Optional
3
3
  from pydantic import BaseModel, Field
4
4
 
5
+ from levelapp.aspects import logger
5
6
  from levelapp.config.endpoint import EndpointConfig
6
7
  from levelapp.core.schemas import WorkflowType, RepositoryType, EvaluatorType
7
8
 
@@ -47,19 +48,45 @@ class WorkflowConfig(BaseModel):
47
48
  extra = "allow"
48
49
 
49
50
  @classmethod
50
- def load(cls, path: Optional[str] = None) -> "WorkflowConfig":
51
- """Load workflow configuration from a YAML/JSON file."""
51
+ def load(cls, path: str | None = None) -> "WorkflowConfig":
52
+ """
53
+ Load workflow configuration from a YAML/JSON file.
54
+
55
+ Args:
56
+ path (str): YAML/JSON configuration file path.
57
+
58
+ Returns:
59
+ WorkflowConfig: An instance of WorkflowConfig.
60
+ """
52
61
  from levelapp.aspects.loader import DataLoader
53
62
 
54
63
  loader = DataLoader()
55
64
  config_dict = loader.load_raw_data(path=path)
65
+ logger.info(f"[{cls.__name__}] Workflow configuration loaded from '{path}' file content")
56
66
  return cls.model_validate(config_dict)
57
67
 
58
68
  @classmethod
59
69
  def from_dict(cls, content: Dict[str, Any]) -> "WorkflowConfig":
60
- """Load workflow configuration from an in-memory dict."""
70
+ """
71
+ Load workflow configuration from an in-memory dict.
72
+
73
+ Args:
74
+ content (dict): Workflow configuration content.
75
+
76
+ Returns:
77
+ WorkflowConfig: An instance of WorkflowConfig.
78
+ """
79
+ logger.info(f"[{cls.__name__}] Workflow configuration loaded from provided content")
61
80
  return cls.model_validate(content)
62
81
 
63
82
  def set_reference_data(self, content: Dict[str, Any]) -> None:
64
- """Load referer data from an in-memory dict."""
83
+ """
84
+ Load referer data from an in-memory dict.
85
+
86
+ Args:
87
+ content (dict): Workflow configuration content.
88
+
89
+ """
65
90
  self.reference_data.data = content
91
+ logger.info(f"[{self.__class__.__name__}] Reference data loaded from provided content")
92
+
@@ -51,7 +51,6 @@ class WorkflowContextBuilder:
51
51
  inputs["reference_data"] = self.config.reference_data.data
52
52
  else:
53
53
  inputs["reference_data_path"] = self.config.reference_data.path
54
- print(f"[WorkflowContextBuilder] reference data path: {inputs['reference_data_path']}")
55
54
 
56
55
  return WorkflowContext(
57
56
  config=self.config,
@@ -8,7 +8,6 @@ from levelapp.workflow.runtime import WorkflowContext
8
8
 
9
9
  class MainFactory:
10
10
  """Central factory for workflows."""
11
-
12
11
  _workflow_map: Dict[WorkflowType, Callable[[WorkflowContext], BaseWorkflow]] = {
13
12
  WorkflowType.SIMULATOR: lambda ctx: SimulatorWorkflow(ctx),
14
13
  WorkflowType.COMPARATOR: lambda ctx: ComparatorWorkflow(ctx),
@@ -16,7 +15,15 @@ class MainFactory:
16
15
 
17
16
  @classmethod
18
17
  def create_workflow(cls, context: WorkflowContext) -> BaseWorkflow:
19
- """Create workflow using the given runtime context."""
18
+ """
19
+ Create workflow using the given runtime context.
20
+
21
+ Args:
22
+ context (WorkflowContext): the provided workflow context.
23
+
24
+ Returns:
25
+ BaseWorkflow: the built workflow instance from the provided context.
26
+ """
20
27
  wf_type = context.config.process.workflow_type
21
28
  builder = cls._workflow_map.get(wf_type)
22
29
  if not builder:
@@ -25,5 +32,11 @@ class MainFactory:
25
32
 
26
33
  @classmethod
27
34
  def register_workflow(cls, wf_type: WorkflowType, builder: Callable[[WorkflowContext], BaseWorkflow]) -> None:
28
- """Register a new workflow implementation."""
35
+ """
36
+ Register a new workflow implementation.
37
+
38
+ Args:
39
+ wf_type (WorkflowType): the workflow type.
40
+ builder (Callable[[WorkflowContext], BaseWorkflow]): the workflow builder.
41
+ """
29
42
  cls._workflow_map[wf_type] = builder
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: levelapp
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]
5
5
  Project-URL: Homepage, https://github.com/levelapp-org
6
6
  Project-URL: Repository, https://github.com/levelapp-org/levelapp-framework
@@ -17,14 +17,12 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
18
  Classifier: Topic :: Software Development :: Testing
19
19
  Requires-Python: >=3.12
20
- Requires-Dist: arrow>=1.3.0
21
20
  Requires-Dist: google-api-core>=2.25.1
22
21
  Requires-Dist: google-auth>=2.40.3
23
22
  Requires-Dist: google-cloud-firestore>=2.21.0
24
23
  Requires-Dist: httpx>=0.28.1
25
24
  Requires-Dist: humanize>=4.13.0
26
25
  Requires-Dist: numpy>=2.3.2
27
- Requires-Dist: openai>=1.99.9
28
26
  Requires-Dist: pandas-stubs==2.3.0.250703
29
27
  Requires-Dist: pandas>=2.3.1
30
28
  Requires-Dist: pydantic>=2.11.7
@@ -133,7 +131,7 @@ endpoint:
133
131
  generated_metadata: "${generated_metadata}"
134
132
 
135
133
  repository:
136
- type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM, MONGODB.
134
+ type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
137
135
  project_id: "(default)"
138
136
  database_name: ""
139
137
  ```
@@ -220,14 +218,14 @@ To run an evaluation:
220
218
 
221
219
  ```python
222
220
  if __name__ == "__main__":
223
- from levelapp.workflow.schemas import WorkflowConfig
221
+ from levelapp.workflow import WorkflowConfig
224
222
  from levelapp.core.session import EvaluationSession
225
223
 
226
224
  # Load configuration from YAML
227
225
  config = WorkflowConfig.load(path="../data/workflow_config.yaml")
228
226
 
229
- # Run evaluation session
230
- with EvaluationSession(session_name="test-session-1", workflow_config=config) as session:
227
+ # Run evaluation session (You can enable/disable the monitoring aspect)
228
+ with EvaluationSession(session_name="test-session-1", workflow_config=config, enable_monitoring=False) as session:
231
229
  session.run()
232
230
  results = session.workflow.collect_results()
233
231
  print("Results:", results)
@@ -243,14 +241,13 @@ if __name__ == "__main__":
243
241
  from levelapp.workflow import WorkflowConfig
244
242
  from levelapp.core.session import EvaluationSession
245
243
 
246
- # Firestore -> retrieve endpoint config -> data => config_dict
247
-
244
+
248
245
  config_dict = {
249
246
  "process": {"project_name": "test-project", "workflow_type": "SIMULATOR", "evaluation_params": {"attempts": 2}},
250
- "evaluation": {"evaluators": ["JUDGE"], "providers": ["openai", "ionos"]},
247
+ "evaluation": {"evaluators": ["JUDGE", "REFERENCE"], "providers": ["openai", "ionos"], "metrics_map": {"field_1": "EXACT"}},
251
248
  "reference_data": {"path": "", "data": {}},
252
249
  "endpoint": {"base_url": "http://127.0.0.1:8000", "api_key": "key", "model_id": "model"},
253
- "repository": {"type": "FIRESTORE", "source": "IN_MEMORY", "metrics_map": {"field_1": "EXACT"}},
250
+ "repository": {"type": "FIRESTORE", "source": "IN_MEMORY"},
254
251
  }
255
252
 
256
253
  content = {
@@ -2,9 +2,9 @@ levelapp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  levelapp/aspects/__init__.py,sha256=_OaPcjTWBizqcUdDVj5aYue7lG9ytjQGLhPvReriKnU,326
3
3
  levelapp/aspects/loader.py,sha256=xWpcWtS25zbVhZ0UnIJEcQA9klajKk10TLK4j1IStH0,9543
4
4
  levelapp/aspects/logger.py,sha256=MJ9HphyHYkTE5-ajA_WuMUTM0qQzd0WIP243vF-pj3M,1698
5
- levelapp/aspects/monitor.py,sha256=lXGgxxeKZNkL5XbC_0ybES75gaRA9SuspHLHAEDbMR8,21981
5
+ levelapp/aspects/monitor.py,sha256=KREhG_KH4f7okyheexaeKykwhzzQsFl1USC-k9YPXfQ,22065
6
6
  levelapp/aspects/sanitizer.py,sha256=zUqgb76tXJ8UUYtHp0Rz7q9PZjAHpSpHPPFfGTjjQNg,5229
7
- levelapp/clients/__init__.py,sha256=bbSFkYIODxEFatET4CFsKGC4m7Ng_tUkTGNcVBMa9h0,4169
7
+ levelapp/clients/__init__.py,sha256=of3Zdkag634COXH_ca4hxXkERT8X44QS0IgfWu2yWqY,4084
8
8
  levelapp/clients/anthropic.py,sha256=Bxp-HffcIPLwM9BLcTR7n-D8ZXYVWCmbr2oH33fKV04,4030
9
9
  levelapp/clients/ionos.py,sha256=GFkLSeu8epFZV44GbNO3h1fRCKcfxscHMTFY0kPfh3o,4267
10
10
  levelapp/clients/mistral.py,sha256=e1NRvP9qN7O2zWAzBbgdQmmUDHQfCRLtVKDJCrh0DNA,3777
@@ -13,18 +13,19 @@ levelapp/comparator/__init__.py,sha256=ynmc0mrx-JbcCqLH-z4hOVezqGocDbDQGqgbhWy2x
13
13
  levelapp/comparator/comparator.py,sha256=yk0FWREnWKhIbXlsYpieqPJPqrlWXzyBMjVSznGqKY8,8183
14
14
  levelapp/comparator/extractor.py,sha256=vJ9iEoWAtXo2r9r7X72uUQPKW3UZE9Kx3uIjCufEp9k,3910
15
15
  levelapp/comparator/schemas.py,sha256=lUAQzEyStidt2ePQgV2zq-An5MLBrVSw6t8fB0FQKJs,1803
16
- levelapp/comparator/scorer.py,sha256=x5tU28SQekCNXmtK6wxr9ht7MZkqI78eYmZA6srkp4o,9167
16
+ levelapp/comparator/scorer.py,sha256=LBRy8H11rXulSa-k40BcycPcMAHgdUm13qS7ibWHq6I,9032
17
17
  levelapp/comparator/utils.py,sha256=Eu48nDrNzFr0lwAJJS0aNhKsAWQ72syTEWYMNYfg764,4331
18
18
  levelapp/config/__init__.py,sha256=9oaajE5zW-OVWOszUzMAG6nHDSbLQWa3KT6bVoSvzRA,137
19
- levelapp/config/endpoint.py,sha256=ll34rZ0KRmUwI81EWJ3HX9i6pziq2YrQb84kv4ErymI,7649
20
- levelapp/config/prompts.py,sha256=crjOk01weLz5_IdF6dDZWPfSmiKNL8SgnbThyf4Jz2o,1345
19
+ levelapp/config/endpoint.py,sha256=B-uIEKF-0_Y6Vo8MZ8eoCZocRkghijrdpwT3zq0FDLk,7647
20
+ levelapp/config/endpoint_.py,sha256=-abrIYKbFPLxTqNst-zbCI4MpMCmCMe9VZ6O8OwNRiE,1629
21
+ levelapp/config/prompts.py,sha256=NXOKRp5l1VQ9LO0pUojVH6TDJhWyZImsAvZEz2QiD9k,2206
21
22
  levelapp/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
23
  levelapp/core/base.py,sha256=oh4OkKgwGxmw_jgjX6wrBoK0KPc1JvCMZfbZP_mGmIg,12453
23
- levelapp/core/schemas.py,sha256=UrUnU6h90uqS1LpcFqmMEpgWJ3772ZD5QBIytczmAbE,469
24
- levelapp/core/session.py,sha256=6bP6s3iWxofWL9LT4qv90VGOntAIa6PBJ_EaWZT0Ur4,7918
24
+ levelapp/core/schemas.py,sha256=E47d93MMOj4eRYZIqUyLBiE5Ye7WgwkOJPOWQ6swRmo,465
25
+ levelapp/core/session.py,sha256=6utDbLdg6DjwHL5dP-4wGe4_f7gFgEukuNNeOnbCbtA,9035
25
26
  levelapp/evaluator/__init__.py,sha256=K-P75Q1FXXLCNqH1wyhT9sf4y2R9a1qR5449AXEsY1k,109
26
- levelapp/evaluator/evaluator.py,sha256=SSveWDIXVg9CTLqexAZJSRpR_wtd5f1bD_s5dG5HJyc,10544
27
- levelapp/metrics/__init__.py,sha256=1y4gDLOu2Jz4QVIgPH-v9YMgaWOFr263tYLUTiFJ-fc,1965
27
+ levelapp/evaluator/evaluator.py,sha256=JCRgQps9GKlJBDYw9xzVrC2_aGy0GhGAJ0ZkSC_IWWA,10806
28
+ levelapp/metrics/__init__.py,sha256=x8iTaeDezJyQ9-NFe8GGvzwIBhyAJHWSRfBE3JRX-PE,1878
28
29
  levelapp/metrics/embedding.py,sha256=wvlT8Q5DjDT6GrAIFtc5aFbA_80hDLUXMP4RbSpSwHE,115
29
30
  levelapp/metrics/exact.py,sha256=Kb13nD2OVLrl3iYHaXrxDfrxDuhW0SMVvLAEXPaJtlY,6235
30
31
  levelapp/metrics/fuzzy.py,sha256=Rg8ashzMxtQwKO-z_LLzdj2PDIRqL4CBw6PGRf9IBrI,2598
@@ -33,17 +34,17 @@ levelapp/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
33
34
  levelapp/repository/__init__.py,sha256=hNmFRZ7kKJN1mMlOHeW9xf0j9Q7gqTXYJ3hMCzk9to4,79
34
35
  levelapp/repository/firestore.py,sha256=K9JgxsNCelAKtzTDv19c1dHRlitMeRzo7H3caTlKuF8,10369
35
36
  levelapp/simulator/__init__.py,sha256=8Dz8g7rbpBZX3WoknVmMVoWm_VT72ZL9BABOF1xFpqs,83
36
- levelapp/simulator/schemas.py,sha256=eEFNNWiAJ8FAIObWcFWF1pL9LmjWwz_2Y-yfY3pHESc,4061
37
- levelapp/simulator/simulator.py,sha256=RKzpV-yDmtugJ3ScJTTA9MwxTdD-oEkwRZLm8N_scjg,19972
38
- levelapp/simulator/utils.py,sha256=qx0JdV1ZDQdTRVKa9xfq278ASrE44GBXSnJZJuhICqo,7365
37
+ levelapp/simulator/schemas.py,sha256=YGprtuRZ4m33WBD35xj1Ib5EbMTdDCOp-wCykf-Iz-4,3700
38
+ levelapp/simulator/simulator.py,sha256=ytgjUE9G8z2oW6rEzSdVkyncPiJcZCLWyvLR0cI5rk8,19895
39
+ levelapp/simulator/utils.py,sha256=d1O4Q4Yl1lAAJWLJDiwNjwt0hD9bGlCan4a2G21E7yw,5930
39
40
  levelapp/workflow/__init__.py,sha256=27b2obG7ObhR43yd2uH-R0koRB7-DG8Emnvrq8EjsTA,193
40
- levelapp/workflow/base.py,sha256=t-vJzwv_OJ9W_pORySJwZq9IENGbWAF3-9-7ozaKDPk,4637
41
- levelapp/workflow/config.py,sha256=ClQaKSWxj7rFcOEQ4budmgOqMBskg6wAibf_gzqUf1o,2142
42
- levelapp/workflow/context.py,sha256=DzyZEb8WHug6vWfzf7BIjZAwtmv43HqgbaB20Pw3eWo,2660
43
- levelapp/workflow/factory.py,sha256=PZHp3AVt61Eop3HwGQDfbO0ju5k7rvNDHKy09eywMTQ,1245
41
+ levelapp/workflow/base.py,sha256=1A_xKSBOmVjfMbRBcNhDK6G17SEjqRIm-XjMw45IPC4,5596
42
+ levelapp/workflow/config.py,sha256=MlHt1PsXD09aukB93fvKTew0D8WD4_jdnO93Nn6b2U0,2923
43
+ levelapp/workflow/context.py,sha256=gjAZXHEdlsXqWY6DbXOfKXNbxQbahRPSnNzyWDqryPU,2559
44
+ levelapp/workflow/factory.py,sha256=z1ttJmI59sU9HgOvPo3ixUJ_oPv838XgehfuOorlTt8,1634
44
45
  levelapp/workflow/registration.py,sha256=VHUHjLHXad5kjcKukaEOIf7hBZ09bT3HAzVmIT08aLo,359
45
46
  levelapp/workflow/runtime.py,sha256=cFyXNWXSuURKbrMDHdkTcjeItM9wHP-5DPljntwYL5g,686
46
- levelapp-0.1.1.dist-info/METADATA,sha256=ozbAgnWY4gl149zqzPgYS7-qkKGutJFb9qL0CoYHbh0,12500
47
- levelapp-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
48
- levelapp-0.1.1.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- levelapp-0.1.1.dist-info/RECORD,,
47
+ levelapp-0.1.2.dist-info/METADATA,sha256=d0ZoS0BTpHpq65Q1KLeqlN12XQiCAyJ-zPP17T22D6o,12446
48
+ levelapp-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
49
+ levelapp-0.1.2.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
+ levelapp-0.1.2.dist-info/RECORD,,