levelapp 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of levelapp might be problematic. Click here for more details.

levelapp/workflow/base.py CHANGED
@@ -4,13 +4,13 @@ from abc import ABC, abstractmethod
4
4
  from pydantic import ValidationError
5
5
  from functools import partial
6
6
  from pathlib import Path
7
- from typing import Any
7
+ from typing import Any, Dict
8
8
 
9
9
  from levelapp.core.base import BaseProcess
10
10
  from levelapp.simulator.schemas import ScriptsBatch
11
11
  from levelapp.simulator.simulator import ConversationSimulator
12
+ from levelapp.workflow.runtime import WorkflowContext
12
13
  from levelapp.aspects.loader import DataLoader
13
- from levelapp.workflow.schemas import WorkflowContext
14
14
 
15
15
 
16
16
  class BaseWorkflow(ABC):
@@ -28,6 +28,7 @@ class BaseWorkflow(ABC):
28
28
  """Validate and initialize workflow-specific settings."""
29
29
  if self._initialized:
30
30
  return
31
+
31
32
  self.process = self._setup_process(context=self.context)
32
33
  self._initialized = True
33
34
 
@@ -56,18 +57,35 @@ class BaseWorkflow(ABC):
56
57
  else:
57
58
  loop = asyncio.get_running_loop()
58
59
  func = partial(self.process.run, **self._input_data)
59
- self._results = await loop.run_in_executor(None, func)
60
+ self._results = await loop.run_in_executor(None, func, None)
60
61
 
61
62
  def collect_results(self) -> Any:
62
- """Return unified results structure."""
63
+ """
64
+ Return unified results structure.
65
+
66
+ Returns:
67
+ The simulation results.
68
+ """
63
69
  return self._results
64
70
 
65
71
  @abstractmethod
66
72
  def _setup_process(self, context: WorkflowContext) -> BaseProcess:
73
+ """
74
+ Abstract method for setting up the configured process.
75
+
76
+ Args:
77
+ context (WorkflowContext): The workflow context.
78
+ """
67
79
  raise NotImplementedError
68
80
 
69
81
  @abstractmethod
70
82
  def _load_input_data(self, context: WorkflowContext) -> Any:
83
+ """
84
+ Abstract method for loading reference data.
85
+
86
+ Args:
87
+ context (WorkflowContext): The workflow context.
88
+ """
71
89
  raise NotImplementedError
72
90
 
73
91
 
@@ -76,38 +94,67 @@ class SimulatorWorkflow(BaseWorkflow):
76
94
  super().__init__(name="ConversationSimulator", context=context)
77
95
 
78
96
  def _setup_process(self, context: WorkflowContext) -> BaseProcess:
97
+ """
98
+ Concrete implementation for setting up the simulation workflow.
99
+
100
+ Args:
101
+ context (WorkflowContext): The workflow context for the simulation workflow.
102
+
103
+ Returns:
104
+ ConversationSimulator instance.
105
+ """
79
106
  simulator = ConversationSimulator()
80
107
  simulator.setup(
81
108
  repository=context.repository,
82
109
  evaluators=context.evaluators,
110
+ providers=context.providers,
83
111
  endpoint_config=context.endpoint_config,
84
112
  )
85
113
  return simulator
86
114
 
87
- def _load_input_data(self, context: WorkflowContext) -> Any:
115
+ def _load_input_data(self, context: WorkflowContext) -> Dict[str, Any]:
116
+ """
117
+ Concrete implementation for loading the reference data.
118
+
119
+ Args:
120
+ context (WorkflowContext): The workflow context for the simulation workflow.
121
+
122
+ Returns:
123
+ Dict[str, Any]: The reference data.
124
+ """
88
125
  loader = DataLoader()
89
- reference_data_path = context.inputs.get("reference_data_path", "no-path-provided")
90
- file_path = Path(reference_data_path)
126
+ if "reference_data" in context.inputs:
127
+ data_config = context.inputs["reference_data"]
128
+ else:
129
+ reference_data_path = context.inputs.get("reference_data_path", "no-path-provided")
130
+
131
+ if not reference_data_path:
132
+ raise RuntimeError(f"[{self.name}] No reference data available.")
133
+
134
+ file_path = Path(reference_data_path)
91
135
 
92
- if not file_path.exists():
93
- raise FileNotFoundError(f"[{self.name}] Reference data file not found.")
136
+ if not file_path.exists():
137
+ raise FileNotFoundError(f"[{self.name}] Reference data file not found.")
138
+
139
+ data_config = loader.load_raw_data(path=reference_data_path)
94
140
 
95
- evaluation_params = context.inputs.get("evaluation_params", {})
96
- data_config = loader.load_raw_data(path=reference_data_path)
97
141
  try:
98
142
  scripts_batch = ScriptsBatch.model_validate(data_config)
143
+
99
144
  except ValidationError as e:
100
145
  raise RuntimeError(f"[{self.name}] Validation error: {e}")
101
146
 
102
- return {"test_batch": scripts_batch, "attempts": evaluation_params.get("attempts", 1)}
147
+ attempts = context.config.process.evaluation_params.get("attempts", 1)
148
+
149
+ return {"test_batch": scripts_batch, "attempts": attempts}
103
150
 
104
151
 
105
152
  class ComparatorWorkflow(BaseWorkflow):
153
+ def __init__(self, context: WorkflowContext) -> None:
154
+ super().__init__(name="MetadataComparator", context=context)
155
+
106
156
  def _setup_process(self, context: WorkflowContext) -> BaseProcess:
107
- pass
157
+ raise NotImplementedError
108
158
 
109
159
  def _load_input_data(self, context: WorkflowContext) -> Any:
110
- pass
111
-
112
- def __init__(self, context: WorkflowContext) -> None:
113
- super().__init__(name="MetadataComparator", context=context)
160
+ raise NotImplementedError
@@ -0,0 +1,92 @@
1
+ """levelapp/workflow/config.py: Contains modular workflow configuration components."""
2
+ from typing import List, Dict, Any, Optional
3
+ from pydantic import BaseModel, Field
4
+
5
+ from levelapp.aspects import logger
6
+ from levelapp.config.endpoint import EndpointConfig
7
+ from levelapp.core.schemas import WorkflowType, RepositoryType, EvaluatorType
8
+
9
+
10
+ class ProcessConfig(BaseModel):
11
+ project_name: str
12
+ workflow_type: WorkflowType
13
+ evaluation_params: Dict[str, Any] = Field(default_factory=dict)
14
+
15
+
16
+ class EvaluationConfig(BaseModel):
17
+ evaluators: List[EvaluatorType]
18
+ providers: List[str] = Field(default_factory=list)
19
+ metrics_map: Dict[str, str] | None = Field(default_factory=dict)
20
+
21
+
22
+ class ReferenceDataConfig(BaseModel):
23
+ path: str | None
24
+ data: Dict[str, Any] | None = Field(default_factory=dict)
25
+
26
+
27
+ class RepositoryConfig(BaseModel):
28
+ type: RepositoryType | None = None
29
+ project_id: str | None = None
30
+ database_name: str = Field(default="(default)")
31
+
32
+ class Config:
33
+ extra = "allow"
34
+
35
+
36
+ class WorkflowConfig(BaseModel):
37
+ """
38
+ Static workflow configuration. Maps directly to YAML sections.
39
+ Supports both file-based loading and in-memory dictionary creation.
40
+ """
41
+ process: ProcessConfig
42
+ evaluation: EvaluationConfig
43
+ reference_data: ReferenceDataConfig
44
+ endpoint: EndpointConfig
45
+ repository: RepositoryConfig
46
+
47
+ class Config:
48
+ extra = "allow"
49
+
50
+ @classmethod
51
+ def load(cls, path: str | None = None) -> "WorkflowConfig":
52
+ """
53
+ Load workflow configuration from a YAML/JSON file.
54
+
55
+ Args:
56
+ path (str): YAML/JSON configuration file path.
57
+
58
+ Returns:
59
+ WorkflowConfig: An instance of WorkflowConfig.
60
+ """
61
+ from levelapp.aspects.loader import DataLoader
62
+
63
+ loader = DataLoader()
64
+ config_dict = loader.load_raw_data(path=path)
65
+ logger.info(f"[{cls.__name__}] Workflow configuration loaded from '{path}' file content")
66
+ return cls.model_validate(config_dict)
67
+
68
+ @classmethod
69
+ def from_dict(cls, content: Dict[str, Any]) -> "WorkflowConfig":
70
+ """
71
+ Load workflow configuration from an in-memory dict.
72
+
73
+ Args:
74
+ content (dict): Workflow configuration content.
75
+
76
+ Returns:
77
+ WorkflowConfig: An instance of WorkflowConfig.
78
+ """
79
+ logger.info(f"[{cls.__name__}] Workflow configuration loaded from provided content")
80
+ return cls.model_validate(content)
81
+
82
+ def set_reference_data(self, content: Dict[str, Any]) -> None:
83
+ """
84
+ Load referer data from an in-memory dict.
85
+
86
+ Args:
87
+ content (dict): Workflow configuration content.
88
+
89
+ """
90
+ self.reference_data.data = content
91
+ logger.info(f"[{self.__class__.__name__}] Reference data loaded from provided content")
92
+
@@ -0,0 +1,62 @@
1
+ """levelapp/workflow/context.py: Builds runtime WorkflowContext from WorkflowConfig."""
2
+ from typing import Dict, Callable
3
+
4
+ from levelapp.workflow.config import WorkflowConfig
5
+ from levelapp.core.base import BaseRepository, BaseEvaluator
6
+ from levelapp.workflow.runtime import WorkflowContext
7
+ from levelapp.core.schemas import EvaluatorType, RepositoryType
8
+
9
+ from levelapp.repository.firestore import FirestoreRepository
10
+ from levelapp.evaluator.evaluator import JudgeEvaluator, MetadataEvaluator
11
+
12
+
13
+ class WorkflowContextBuilder:
14
+ """Builds a runtime WorkflowContext from a WorkflowConfig."""
15
+
16
+ def __init__(self, config: WorkflowConfig) -> None:
17
+ self.config = config
18
+
19
+ # Map repository type to constructor that accepts the WorkflowConfig
20
+ self.repository_map: Dict[RepositoryType, Callable[[WorkflowConfig], BaseRepository]] = {
21
+ RepositoryType.FIRESTORE: lambda cfg: FirestoreRepository(cfg),
22
+ }
23
+
24
+ # Map evaluator type to constructor that accepts the WorkflowConfig
25
+ self.evaluator_map: Dict[EvaluatorType, Callable[[WorkflowConfig], BaseEvaluator]] = {
26
+ EvaluatorType.JUDGE: lambda cfg: JudgeEvaluator(config=cfg),
27
+ EvaluatorType.REFERENCE: lambda cfg: MetadataEvaluator(config=cfg),
28
+ }
29
+
30
+ def build(self) -> WorkflowContext:
31
+ """
32
+ Build a runtime WorkflowContext from the static WorkflowConfig.
33
+ Supports in-memory reference data if provided.
34
+ """
35
+ # Repository instance
36
+ repository_type = self.config.repository.type
37
+ repository = self.repository_map.get(repository_type)(self.config)
38
+
39
+ # Evaluator instances
40
+ evaluators: Dict[EvaluatorType, BaseEvaluator] = {
41
+ ev: self.evaluator_map.get(ev)(self.config) for ev in self.config.evaluation.evaluators
42
+ }
43
+
44
+ # Providers and endpoint
45
+ providers = self.config.evaluation.providers
46
+ endpoint_config = self.config.endpoint
47
+
48
+ # Inputs include reference data path or in-memory dict
49
+ inputs = {}
50
+ if self.config.reference_data.data:
51
+ inputs["reference_data"] = self.config.reference_data.data
52
+ else:
53
+ inputs["reference_data_path"] = self.config.reference_data.path
54
+
55
+ return WorkflowContext(
56
+ config=self.config,
57
+ repository=repository,
58
+ evaluators=evaluators,
59
+ providers=providers,
60
+ endpoint_config=endpoint_config,
61
+ inputs=inputs,
62
+ )
@@ -1,51 +1,42 @@
1
- from typing import Callable, Dict
2
- from levelapp.workflow.schemas import WorkflowType, RepositoryType, EvaluatorType, WorkflowConfig, WorkflowContext
3
- from levelapp.core.base import BaseRepository, BaseEvaluator
4
- from levelapp.workflow.base import BaseWorkflow
1
+ """levelapp/workflow/factory.py: Creates workflows using WorkflowContext."""
2
+ from typing import Dict, Callable
5
3
 
6
- from levelapp.repository.firestore import FirestoreRepository
7
- from levelapp.evaluator.evaluator import JudgeEvaluator, MetadataEvaluator
4
+ from levelapp.core.schemas import WorkflowType
5
+ from levelapp.workflow.base import SimulatorWorkflow, ComparatorWorkflow, BaseWorkflow
6
+ from levelapp.workflow.runtime import WorkflowContext
8
7
 
9
8
 
10
9
  class MainFactory:
11
- """Central factory for repositories, evaluators, and workflows."""
12
-
13
- _repository_map: dict[RepositoryType, Callable[[WorkflowConfig], BaseRepository]] = {
14
- RepositoryType.FIRESTORE: lambda cfg: FirestoreRepository(),
15
- }
16
-
17
- _evaluator_map: dict[EvaluatorType, Callable[[WorkflowConfig], BaseEvaluator]] = {
18
- EvaluatorType.JUDGE: lambda cfg: JudgeEvaluator(),
19
- EvaluatorType.REFERENCE: lambda cfg: MetadataEvaluator(),
20
- # Next is the RAG evaluator..
10
+ """Central factory for workflows."""
11
+ _workflow_map: Dict[WorkflowType, Callable[[WorkflowContext], BaseWorkflow]] = {
12
+ WorkflowType.SIMULATOR: lambda ctx: SimulatorWorkflow(ctx),
13
+ WorkflowType.COMPARATOR: lambda ctx: ComparatorWorkflow(ctx),
21
14
  }
22
15
 
23
- _workflow_map: dict[WorkflowType, Callable[["WorkflowContext"], BaseWorkflow]] = {}
24
-
25
- @classmethod
26
- def create_repository(cls, config: WorkflowConfig) -> BaseRepository:
27
- fn = cls._repository_map.get(config.repository)
28
- if not fn:
29
- raise NotImplementedError(f"Repository {config.repository} not implemented")
30
- return fn(config)
31
-
32
- @classmethod
33
- def create_evaluator(cls, config: WorkflowConfig) -> Dict[EvaluatorType, BaseEvaluator]:
34
- evaluators: dict[EvaluatorType, BaseEvaluator] = {}
35
- for ev in config.evaluators:
36
- fn = cls._evaluator_map.get(ev)
37
- if not fn:
38
- raise NotImplementedError(f"Evaluator {config.evaluators} not implemented")
39
- evaluators[ev] = fn(config)
40
- return evaluators
41
-
42
16
  @classmethod
43
- def create_workflow(cls, wf_type: WorkflowType, context: "WorkflowContext") -> BaseWorkflow:
44
- fn = cls._workflow_map.get(wf_type)
45
- if not fn:
46
- raise NotImplementedError(f"Workflow {wf_type} not implemented")
47
- return fn(context)
17
+ def create_workflow(cls, context: WorkflowContext) -> BaseWorkflow:
18
+ """
19
+ Create workflow using the given runtime context.
20
+
21
+ Args:
22
+ context (WorkflowContext): the provided workflow context.
23
+
24
+ Returns:
25
+ BaseWorkflow: the built workflow instance from the provided context.
26
+ """
27
+ wf_type = context.config.process.workflow_type
28
+ builder = cls._workflow_map.get(wf_type)
29
+ if not builder:
30
+ raise NotImplementedError(f"Workflow '{wf_type}' not implemented")
31
+ return builder(context)
48
32
 
49
33
  @classmethod
50
- def register_workflow(cls, wf_type: WorkflowType, builder: Callable[["WorkflowContext"], BaseWorkflow]) -> None:
34
+ def register_workflow(cls, wf_type: WorkflowType, builder: Callable[[WorkflowContext], BaseWorkflow]) -> None:
35
+ """
36
+ Register a new workflow implementation.
37
+
38
+ Args:
39
+ wf_type (WorkflowType): the workflow type.
40
+ builder (Callable[[WorkflowContext], BaseWorkflow]): the workflow builder.
41
+ """
51
42
  cls._workflow_map[wf_type] = builder
@@ -1,5 +1,5 @@
1
+ from levelapp.core.schemas import WorkflowType
1
2
  from levelapp.workflow.factory import MainFactory
2
- from levelapp.workflow.schemas import WorkflowType
3
3
  from levelapp.workflow.base import SimulatorWorkflow, ComparatorWorkflow
4
4
 
5
5
  MainFactory.register_workflow(WorkflowType.SIMULATOR, lambda ctx: SimulatorWorkflow(ctx))
@@ -0,0 +1,19 @@
1
+ """levelapp/workflow/runtime.py: contains the workflow runtime context component."""
2
+ from dataclasses import dataclass
3
+ from typing import Dict, List, Any
4
+
5
+ from levelapp.config import EndpointConfig
6
+ from levelapp.core.base import BaseRepository, BaseEvaluator
7
+ from levelapp.workflow.config import WorkflowConfig
8
+ from levelapp.core.schemas import EvaluatorType
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class WorkflowContext:
13
+ """Immutable data holder for workflow execution context."""
14
+ config: WorkflowConfig
15
+ repository: BaseRepository
16
+ evaluators: Dict[EvaluatorType, BaseEvaluator]
17
+ providers: List[str]
18
+ endpoint_config: EndpointConfig
19
+ inputs: Dict[str, Any]
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: levelapp
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]
5
5
  Project-URL: Homepage, https://github.com/levelapp-org
6
6
  Project-URL: Repository, https://github.com/levelapp-org/levelapp-framework
7
7
  Project-URL: Documentation, https://levelapp.readthedocs.io
8
8
  Project-URL: Issues, https://github.com/levelapp-org/levelapp-framework/issues
9
- Author-email: KadriSof <kadrisofyen@gmail.com>
9
+ Author-email: Mohamed Sofiene KADRI <ms.kadri.dev@gmail.com>
10
10
  License-File: LICENSE
11
11
  Keywords: ai,evaluation,framework,llm,testing
12
12
  Classifier: Development Status :: 3 - Alpha
@@ -17,10 +17,12 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
18
  Classifier: Topic :: Software Development :: Testing
19
19
  Requires-Python: >=3.12
20
- Requires-Dist: arrow>=1.3.0
20
+ Requires-Dist: google-api-core>=2.25.1
21
+ Requires-Dist: google-auth>=2.40.3
22
+ Requires-Dist: google-cloud-firestore>=2.21.0
21
23
  Requires-Dist: httpx>=0.28.1
24
+ Requires-Dist: humanize>=4.13.0
22
25
  Requires-Dist: numpy>=2.3.2
23
- Requires-Dist: openai>=1.99.9
24
26
  Requires-Dist: pandas-stubs==2.3.0.250703
25
27
  Requires-Dist: pandas>=2.3.1
26
28
  Requires-Dist: pydantic>=2.11.7
@@ -91,38 +93,47 @@ pip install levelapp
91
93
  LevelApp uses a YAML configuration file to define the evaluation setup. Create a `workflow_config.yaml` with the following structure:
92
94
 
93
95
  ```yaml
94
- project_name: "test-project"
95
- evaluation_params:
96
- attempts: 1 # Number of simulation attempts.
96
+ process:
97
+ project_name: "test-project"
98
+ workflow_type: SIMULATOR # Pick one of the following workflows: SIMULATOR, COMPARATOR, ASSESSOR.
99
+ evaluation_params:
100
+ attempts: 1 # Add the number of simulation attempts.
101
+ batch_size: 5
102
+
103
+ evaluation:
104
+ evaluators: # Select from the following: JUDGE, REFERENCE, RAG.
105
+ - JUDGE
106
+ - REFERENCE
107
+ providers:
108
+ - openai
109
+ - ionos
110
+ metrics_map:
111
+ field_1: EXACT
112
+ field_2 : LEVENSHTEIN
97
113
 
98
- workflow: SIMULATOR # SIMULATOR, COMPARATOR, ASSESSOR.
99
- repository: FIRESTORE # FIRESTORE, FILESYSTEM, MONGODB.
100
- evaluators: # JUDGE, REFERENCE, RAG.
101
- - JUDGE
102
- - REFERENCE
114
+ reference_data:
115
+ path:
116
+ data:
103
117
 
104
- endpoint_configuration:
118
+ endpoint:
105
119
  base_url: "http://127.0.0.1:8000"
106
120
  url_path: ''
107
121
  api_key: "<API-KEY>"
108
122
  bearer_token: "<BEARER-TOKEN>"
109
123
  model_id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
110
- payload_path: "../../src/data/payload_example_1.yaml"
111
124
  default_request_payload_template:
125
+ # Change the user message field name only according to the request payload schema (example: 'prompt' to 'message').
112
126
  prompt: "${user_message}"
113
127
  details: "${request_payload}" # Rest of the request payload data.
114
128
  default_response_payload_template:
129
+ # Change the placeholder value only according to the response payload schema (example: ${agent_reply} to ${reply}).
115
130
  agent_reply: "${agent_reply}"
116
- guardrail_flag: "${guardrail_flag}"
117
131
  generated_metadata: "${generated_metadata}"
118
132
 
119
- reference_data:
120
- source: LOCAL # LOCAL or REMOTE.
121
- path: "../../src/data/conversation_example_1.json"
122
-
123
- metrics_map:
124
- field_1: EXACT
125
- field_2: LEVENSHTEIN
133
+ repository:
134
+ type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM
135
+ project_id: "(default)"
136
+ database_name: ""
126
137
  ```
127
138
 
128
139
  - **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, auth, payload templates).
@@ -133,33 +144,26 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
133
144
 
134
145
  ```json
135
146
  {
136
- "id": "1fa6f6ed-3cfe-4c0b-b389-7292f58879d4",
137
147
  "scripts": [
138
148
  {
139
- "id": "65f58cec-d55d-4a24-bf16-fa8327a3aa6b",
140
149
  "interactions": [
141
150
  {
142
- "id": "e99a2898-6a79-4a20-ac85-dfe977ea9935",
143
151
  "user_message": "Hello, I would like to book an appointment with a doctor.",
144
152
  "reference_reply": "Sure, I can help with that. Could you please specify the type of doctor you need to see?",
145
153
  "interaction_type": "initial",
146
154
  "reference_metadata": {},
147
- "generated_metadata": {},
148
155
  "guardrail_flag": false,
149
156
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
150
157
  },
151
158
  {
152
- "id": "fe5c539a-d0a1-40ee-97bd-dbe456703ccc",
153
159
  "user_message": "I need to see a cardiologist.",
154
160
  "reference_reply": "When would you like to schedule your appointment?",
155
161
  "interaction_type": "intermediate",
156
162
  "reference_metadata": {},
157
- "generated_metadata": {},
158
163
  "guardrail_flag": false,
159
164
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
160
165
  },
161
166
  {
162
- "id": "2cfdbd1c-a065-48bb-9aa9-b958342154b1",
163
167
  "user_message": "I would like to book it for next Monday morning.",
164
168
  "reference_reply": "We have an available slot at 10 AM next Monday. Does that work for you?",
165
169
  "interaction_type": "intermediate",
@@ -168,11 +172,6 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
168
172
  "date": "next Monday",
169
173
  "time": "10 AM"
170
174
  },
171
- "generated_metadata": {
172
- "appointment_type": "Cardiology",
173
- "date": "next Monday",
174
- "time": "morning"
175
- },
176
175
  "guardrail_flag": false,
177
176
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
178
177
  },
@@ -182,7 +181,6 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
182
181
  "reference_reply": "Your appointment with the cardiologist is booked for 10 AM next Monday. Is there anything else I can help you with?",
183
182
  "interaction_type": "final",
184
183
  "reference_metadata": {},
185
- "generated_metadata": {},
186
184
  "guardrail_flag": false,
187
185
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
188
186
  }
@@ -195,9 +193,22 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
195
193
  ]
196
194
  }
197
195
  ```
198
-
199
196
  - **Fields**: Include user messages, reference/references replies, metadata for comparison, guardrail flags, and request payloads.
200
197
 
198
+ In the `.env` you need to add the LLM providers credentials that will be used for the evaluation process.
199
+ ```
200
+ OPENAI_API_KEY=
201
+ IONOS_API_KEY=
202
+ ANTHROPIC_API_KEY=
203
+ MISTRAL_API_KEY=
204
+
205
+ # For IONOS, you must include the base URL and the model ID.
206
+ IONOS_BASE_URL="https://inference.de-txl.ionos.com"
207
+ IONOS_MODEL_ID="0b6c4a15-bb8d-4092-82b0-f357b77c59fd"
208
+
209
+ WORKFLOW_CONFIG_PATH="../../src/data/workflow_config_1.yaml"
210
+ ```
211
+
201
212
  ## Usage Example
202
213
 
203
214
  To run an evaluation:
@@ -207,14 +218,14 @@ To run an evaluation:
207
218
 
208
219
  ```python
209
220
  if __name__ == "__main__":
210
- from levelapp.workflow.schemas import WorkflowConfig
221
+ from levelapp.workflow import WorkflowConfig
211
222
  from levelapp.core.session import EvaluationSession
212
223
 
213
224
  # Load configuration from YAML
214
225
  config = WorkflowConfig.load(path="../data/workflow_config.yaml")
215
226
 
216
- # Run evaluation session
217
- with EvaluationSession(session_name="sim-test", workflow_config=config) as session:
227
+ # Run evaluation session (You can enable/disable the monitoring aspect)
228
+ with EvaluationSession(session_name="test-session-1", workflow_config=config, enable_monitoring=False) as session:
218
229
  session.run()
219
230
  results = session.workflow.collect_results()
220
231
  print("Results:", results)
@@ -223,6 +234,58 @@ if __name__ == "__main__":
223
234
  print(f"session stats:\n{stats}")
224
235
  ```
225
236
 
237
+ Alternatively, if you want to pass the configuration and reference data from in-memory variables,
238
+ you can manually load the data like the following:
239
+ ```python
240
+ if __name__ == "__main__":
241
+ from levelapp.workflow import WorkflowConfig
242
+ from levelapp.core.session import EvaluationSession
243
+
244
+
245
+ config_dict = {
246
+ "process": {"project_name": "test-project", "workflow_type": "SIMULATOR", "evaluation_params": {"attempts": 2}},
247
+ "evaluation": {"evaluators": ["JUDGE", "REFERENCE"], "providers": ["openai", "ionos"], "metrics_map": {"field_1": "EXACT"}},
248
+ "reference_data": {"path": "", "data": {}},
249
+ "endpoint": {"base_url": "http://127.0.0.1:8000", "api_key": "key", "model_id": "model"},
250
+ "repository": {"type": "FIRESTORE", "source": "IN_MEMORY"},
251
+ }
252
+
253
+ content = {
254
+ "scripts": [
255
+ {
256
+ "interactions": [
257
+ {
258
+ "user_message": "Hello!",
259
+ "reference_reply": "Hello, how can I help you!"
260
+ },
261
+ {
262
+ "user_message": "I need an apartment",
263
+ "reference_reply": "sorry, but I can only assist you with booking medical appointments."
264
+ },
265
+ ]
266
+ },
267
+ ]
268
+ }
269
+
270
+ # Load configuration from a dict variable
271
+ config = WorkflowConfig.from_dict(content=config_dict)
272
+
273
+ # Load reference data from dict variable
274
+ config.set_reference_data(content=content)
275
+
276
+ evaluation_session = EvaluationSession(session_name="test-session-2", workflow_config=config)
277
+
278
+ with evaluation_session as session:
279
+ session.run()
280
+ results = session.workflow.collect_results()
281
+ print("Results:", results)
282
+
283
+ stats = session.get_stats()
284
+ print(f"session stats:\n{stats}")
285
+
286
+ ```
287
+
288
+
226
289
  - This loads the config, runs the specified workflow (e.g., Simulator), collects results, and prints stats.
227
290
 
228
291
  For more examples, see the `examples/` directory.