levelapp 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of levelapp might be problematic. Click here for more details.

levelapp/workflow/base.py CHANGED
@@ -9,8 +9,8 @@ from typing import Any
9
9
  from levelapp.core.base import BaseProcess
10
10
  from levelapp.simulator.schemas import ScriptsBatch
11
11
  from levelapp.simulator.simulator import ConversationSimulator
12
+ from levelapp.workflow.runtime import WorkflowContext
12
13
  from levelapp.aspects.loader import DataLoader
13
- from levelapp.workflow.schemas import WorkflowContext
14
14
 
15
15
 
16
16
  class BaseWorkflow(ABC):
@@ -28,6 +28,7 @@ class BaseWorkflow(ABC):
28
28
  """Validate and initialize workflow-specific settings."""
29
29
  if self._initialized:
30
30
  return
31
+
31
32
  self.process = self._setup_process(context=self.context)
32
33
  self._initialized = True
33
34
 
@@ -56,7 +57,7 @@ class BaseWorkflow(ABC):
56
57
  else:
57
58
  loop = asyncio.get_running_loop()
58
59
  func = partial(self.process.run, **self._input_data)
59
- self._results = await loop.run_in_executor(None, func)
60
+ self._results = await loop.run_in_executor(None, func, None)
60
61
 
61
62
  def collect_results(self) -> Any:
62
63
  """Return unified results structure."""
@@ -80,34 +81,45 @@ class SimulatorWorkflow(BaseWorkflow):
80
81
  simulator.setup(
81
82
  repository=context.repository,
82
83
  evaluators=context.evaluators,
84
+ providers=context.providers,
83
85
  endpoint_config=context.endpoint_config,
84
86
  )
85
87
  return simulator
86
88
 
87
89
  def _load_input_data(self, context: WorkflowContext) -> Any:
88
90
  loader = DataLoader()
89
- reference_data_path = context.inputs.get("reference_data_path", "no-path-provided")
90
- file_path = Path(reference_data_path)
91
+ if "reference_data" in context.inputs:
92
+ data_config = context.inputs["reference_data"]
93
+ else:
94
+ reference_data_path = context.inputs.get("reference_data_path", "no-path-provided")
95
+
96
+ if not reference_data_path:
97
+ raise RuntimeError(f"[{self.name}] No reference data available.")
98
+
99
+ file_path = Path(reference_data_path)
91
100
 
92
- if not file_path.exists():
93
- raise FileNotFoundError(f"[{self.name}] Reference data file not found.")
101
+ if not file_path.exists():
102
+ raise FileNotFoundError(f"[{self.name}] Reference data file not found.")
103
+
104
+ data_config = loader.load_raw_data(path=reference_data_path)
94
105
 
95
- evaluation_params = context.inputs.get("evaluation_params", {})
96
- data_config = loader.load_raw_data(path=reference_data_path)
97
106
  try:
98
107
  scripts_batch = ScriptsBatch.model_validate(data_config)
108
+
99
109
  except ValidationError as e:
100
110
  raise RuntimeError(f"[{self.name}] Validation error: {e}")
101
111
 
102
- return {"test_batch": scripts_batch, "attempts": evaluation_params.get("attempts", 1)}
112
+ attempts = context.config.process.evaluation_params.get("attempts", 1)
113
+
114
+ return {"test_batch": scripts_batch, "attempts": attempts}
103
115
 
104
116
 
105
117
  class ComparatorWorkflow(BaseWorkflow):
118
+ def __init__(self, context: WorkflowContext) -> None:
119
+ super().__init__(name="MetadataComparator", context=context)
120
+
106
121
  def _setup_process(self, context: WorkflowContext) -> BaseProcess:
107
- pass
122
+ raise NotImplementedError
108
123
 
109
124
  def _load_input_data(self, context: WorkflowContext) -> Any:
110
- pass
111
-
112
- def __init__(self, context: WorkflowContext) -> None:
113
- super().__init__(name="MetadataComparator", context=context)
125
+ raise NotImplementedError
@@ -0,0 +1,65 @@
1
+ """levelapp/workflow/config.py: Contains modular workflow configuration components."""
2
+ from typing import List, Dict, Any, Optional
3
+ from pydantic import BaseModel, Field
4
+
5
+ from levelapp.config.endpoint import EndpointConfig
6
+ from levelapp.core.schemas import WorkflowType, RepositoryType, EvaluatorType
7
+
8
+
9
+ class ProcessConfig(BaseModel):
10
+ project_name: str
11
+ workflow_type: WorkflowType
12
+ evaluation_params: Dict[str, Any] = Field(default_factory=dict)
13
+
14
+
15
+ class EvaluationConfig(BaseModel):
16
+ evaluators: List[EvaluatorType]
17
+ providers: List[str] = Field(default_factory=list)
18
+ metrics_map: Dict[str, str] | None = Field(default_factory=dict)
19
+
20
+
21
+ class ReferenceDataConfig(BaseModel):
22
+ path: str | None
23
+ data: Dict[str, Any] | None = Field(default_factory=dict)
24
+
25
+
26
+ class RepositoryConfig(BaseModel):
27
+ type: RepositoryType | None = None
28
+ project_id: str | None = None
29
+ database_name: str = Field(default="(default)")
30
+
31
+ class Config:
32
+ extra = "allow"
33
+
34
+
35
+ class WorkflowConfig(BaseModel):
36
+ """
37
+ Static workflow configuration. Maps directly to YAML sections.
38
+ Supports both file-based loading and in-memory dictionary creation.
39
+ """
40
+ process: ProcessConfig
41
+ evaluation: EvaluationConfig
42
+ reference_data: ReferenceDataConfig
43
+ endpoint: EndpointConfig
44
+ repository: RepositoryConfig
45
+
46
+ class Config:
47
+ extra = "allow"
48
+
49
+ @classmethod
50
+ def load(cls, path: Optional[str] = None) -> "WorkflowConfig":
51
+ """Load workflow configuration from a YAML/JSON file."""
52
+ from levelapp.aspects.loader import DataLoader
53
+
54
+ loader = DataLoader()
55
+ config_dict = loader.load_raw_data(path=path)
56
+ return cls.model_validate(config_dict)
57
+
58
+ @classmethod
59
+ def from_dict(cls, content: Dict[str, Any]) -> "WorkflowConfig":
60
+ """Load workflow configuration from an in-memory dict."""
61
+ return cls.model_validate(content)
62
+
63
+ def set_reference_data(self, content: Dict[str, Any]) -> None:
64
+ """Load referer data from an in-memory dict."""
65
+ self.reference_data.data = content
@@ -0,0 +1,63 @@
1
+ """levelapp/workflow/context.py: Builds runtime WorkflowContext from WorkflowConfig."""
2
+ from typing import Dict, Callable
3
+
4
+ from levelapp.workflow.config import WorkflowConfig
5
+ from levelapp.core.base import BaseRepository, BaseEvaluator
6
+ from levelapp.workflow.runtime import WorkflowContext
7
+ from levelapp.core.schemas import EvaluatorType, RepositoryType
8
+
9
+ from levelapp.repository.firestore import FirestoreRepository
10
+ from levelapp.evaluator.evaluator import JudgeEvaluator, MetadataEvaluator
11
+
12
+
13
+ class WorkflowContextBuilder:
14
+ """Builds a runtime WorkflowContext from a WorkflowConfig."""
15
+
16
+ def __init__(self, config: WorkflowConfig) -> None:
17
+ self.config = config
18
+
19
+ # Map repository type to constructor that accepts the WorkflowConfig
20
+ self.repository_map: Dict[RepositoryType, Callable[[WorkflowConfig], BaseRepository]] = {
21
+ RepositoryType.FIRESTORE: lambda cfg: FirestoreRepository(cfg),
22
+ }
23
+
24
+ # Map evaluator type to constructor that accepts the WorkflowConfig
25
+ self.evaluator_map: Dict[EvaluatorType, Callable[[WorkflowConfig], BaseEvaluator]] = {
26
+ EvaluatorType.JUDGE: lambda cfg: JudgeEvaluator(config=cfg),
27
+ EvaluatorType.REFERENCE: lambda cfg: MetadataEvaluator(config=cfg),
28
+ }
29
+
30
+ def build(self) -> WorkflowContext:
31
+ """
32
+ Build a runtime WorkflowContext from the static WorkflowConfig.
33
+ Supports in-memory reference data if provided.
34
+ """
35
+ # Repository instance
36
+ repository_type = self.config.repository.type
37
+ repository = self.repository_map.get(repository_type)(self.config)
38
+
39
+ # Evaluator instances
40
+ evaluators: Dict[EvaluatorType, BaseEvaluator] = {
41
+ ev: self.evaluator_map.get(ev)(self.config) for ev in self.config.evaluation.evaluators
42
+ }
43
+
44
+ # Providers and endpoint
45
+ providers = self.config.evaluation.providers
46
+ endpoint_config = self.config.endpoint
47
+
48
+ # Inputs include reference data path or in-memory dict
49
+ inputs = {}
50
+ if self.config.reference_data.data:
51
+ inputs["reference_data"] = self.config.reference_data.data
52
+ else:
53
+ inputs["reference_data_path"] = self.config.reference_data.path
54
+ print(f"[WorkflowContextBuilder] reference data path: {inputs['reference_data_path']}")
55
+
56
+ return WorkflowContext(
57
+ config=self.config,
58
+ repository=repository,
59
+ evaluators=evaluators,
60
+ providers=providers,
61
+ endpoint_config=endpoint_config,
62
+ inputs=inputs,
63
+ )
@@ -1,51 +1,29 @@
1
- from typing import Callable, Dict
2
- from levelapp.workflow.schemas import WorkflowType, RepositoryType, EvaluatorType, WorkflowConfig, WorkflowContext
3
- from levelapp.core.base import BaseRepository, BaseEvaluator
4
- from levelapp.workflow.base import BaseWorkflow
1
+ """levelapp/workflow/factory.py: Creates workflows using WorkflowContext."""
2
+ from typing import Dict, Callable
5
3
 
6
- from levelapp.repository.firestore import FirestoreRepository
7
- from levelapp.evaluator.evaluator import JudgeEvaluator, MetadataEvaluator
4
+ from levelapp.core.schemas import WorkflowType
5
+ from levelapp.workflow.base import SimulatorWorkflow, ComparatorWorkflow, BaseWorkflow
6
+ from levelapp.workflow.runtime import WorkflowContext
8
7
 
9
8
 
10
9
  class MainFactory:
11
- """Central factory for repositories, evaluators, and workflows."""
10
+ """Central factory for workflows."""
12
11
 
13
- _repository_map: dict[RepositoryType, Callable[[WorkflowConfig], BaseRepository]] = {
14
- RepositoryType.FIRESTORE: lambda cfg: FirestoreRepository(),
12
+ _workflow_map: Dict[WorkflowType, Callable[[WorkflowContext], BaseWorkflow]] = {
13
+ WorkflowType.SIMULATOR: lambda ctx: SimulatorWorkflow(ctx),
14
+ WorkflowType.COMPARATOR: lambda ctx: ComparatorWorkflow(ctx),
15
15
  }
16
16
 
17
- _evaluator_map: dict[EvaluatorType, Callable[[WorkflowConfig], BaseEvaluator]] = {
18
- EvaluatorType.JUDGE: lambda cfg: JudgeEvaluator(),
19
- EvaluatorType.REFERENCE: lambda cfg: MetadataEvaluator(),
20
- # Next is the RAG evaluator..
21
- }
22
-
23
- _workflow_map: dict[WorkflowType, Callable[["WorkflowContext"], BaseWorkflow]] = {}
24
-
25
- @classmethod
26
- def create_repository(cls, config: WorkflowConfig) -> BaseRepository:
27
- fn = cls._repository_map.get(config.repository)
28
- if not fn:
29
- raise NotImplementedError(f"Repository {config.repository} not implemented")
30
- return fn(config)
31
-
32
- @classmethod
33
- def create_evaluator(cls, config: WorkflowConfig) -> Dict[EvaluatorType, BaseEvaluator]:
34
- evaluators: dict[EvaluatorType, BaseEvaluator] = {}
35
- for ev in config.evaluators:
36
- fn = cls._evaluator_map.get(ev)
37
- if not fn:
38
- raise NotImplementedError(f"Evaluator {config.evaluators} not implemented")
39
- evaluators[ev] = fn(config)
40
- return evaluators
41
-
42
17
  @classmethod
43
- def create_workflow(cls, wf_type: WorkflowType, context: "WorkflowContext") -> BaseWorkflow:
44
- fn = cls._workflow_map.get(wf_type)
45
- if not fn:
46
- raise NotImplementedError(f"Workflow {wf_type} not implemented")
47
- return fn(context)
18
+ def create_workflow(cls, context: WorkflowContext) -> BaseWorkflow:
19
+ """Create workflow using the given runtime context."""
20
+ wf_type = context.config.process.workflow_type
21
+ builder = cls._workflow_map.get(wf_type)
22
+ if not builder:
23
+ raise NotImplementedError(f"Workflow '{wf_type}' not implemented")
24
+ return builder(context)
48
25
 
49
26
  @classmethod
50
- def register_workflow(cls, wf_type: WorkflowType, builder: Callable[["WorkflowContext"], BaseWorkflow]) -> None:
27
+ def register_workflow(cls, wf_type: WorkflowType, builder: Callable[[WorkflowContext], BaseWorkflow]) -> None:
28
+ """Register a new workflow implementation."""
51
29
  cls._workflow_map[wf_type] = builder
@@ -1,5 +1,5 @@
1
+ from levelapp.core.schemas import WorkflowType
1
2
  from levelapp.workflow.factory import MainFactory
2
- from levelapp.workflow.schemas import WorkflowType
3
3
  from levelapp.workflow.base import SimulatorWorkflow, ComparatorWorkflow
4
4
 
5
5
  MainFactory.register_workflow(WorkflowType.SIMULATOR, lambda ctx: SimulatorWorkflow(ctx))
@@ -0,0 +1,19 @@
1
+ """levelapp/workflow/runtime.py: contains the workflow runtime context component."""
2
+ from dataclasses import dataclass
3
+ from typing import Dict, List, Any
4
+
5
+ from levelapp.config import EndpointConfig
6
+ from levelapp.core.base import BaseRepository, BaseEvaluator
7
+ from levelapp.workflow.config import WorkflowConfig
8
+ from levelapp.core.schemas import EvaluatorType
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class WorkflowContext:
13
+ """Immutable data holder for workflow execution context."""
14
+ config: WorkflowConfig
15
+ repository: BaseRepository
16
+ evaluators: Dict[EvaluatorType, BaseEvaluator]
17
+ providers: List[str]
18
+ endpoint_config: EndpointConfig
19
+ inputs: Dict[str, Any]
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: levelapp
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: LevelApp is an evaluation framework for AI/LLM-based software application. [Powered by Norma]
5
5
  Project-URL: Homepage, https://github.com/levelapp-org
6
6
  Project-URL: Repository, https://github.com/levelapp-org/levelapp-framework
7
7
  Project-URL: Documentation, https://levelapp.readthedocs.io
8
8
  Project-URL: Issues, https://github.com/levelapp-org/levelapp-framework/issues
9
- Author-email: KadriSof <kadrisofyen@gmail.com>
9
+ Author-email: Mohamed Sofiene KADRI <ms.kadri.dev@gmail.com>
10
10
  License-File: LICENSE
11
11
  Keywords: ai,evaluation,framework,llm,testing
12
12
  Classifier: Development Status :: 3 - Alpha
@@ -18,7 +18,11 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
18
  Classifier: Topic :: Software Development :: Testing
19
19
  Requires-Python: >=3.12
20
20
  Requires-Dist: arrow>=1.3.0
21
+ Requires-Dist: google-api-core>=2.25.1
22
+ Requires-Dist: google-auth>=2.40.3
23
+ Requires-Dist: google-cloud-firestore>=2.21.0
21
24
  Requires-Dist: httpx>=0.28.1
25
+ Requires-Dist: humanize>=4.13.0
22
26
  Requires-Dist: numpy>=2.3.2
23
27
  Requires-Dist: openai>=1.99.9
24
28
  Requires-Dist: pandas-stubs==2.3.0.250703
@@ -91,38 +95,47 @@ pip install levelapp
91
95
  LevelApp uses a YAML configuration file to define the evaluation setup. Create a `workflow_config.yaml` with the following structure:
92
96
 
93
97
  ```yaml
94
- project_name: "test-project"
95
- evaluation_params:
96
- attempts: 1 # Number of simulation attempts.
98
+ process:
99
+ project_name: "test-project"
100
+ workflow_type: SIMULATOR # Pick one of the following workflows: SIMULATOR, COMPARATOR, ASSESSOR.
101
+ evaluation_params:
102
+ attempts: 1 # Add the number of simulation attempts.
103
+ batch_size: 5
104
+
105
+ evaluation:
106
+ evaluators: # Select from the following: JUDGE, REFERENCE, RAG.
107
+ - JUDGE
108
+ - REFERENCE
109
+ providers:
110
+ - openai
111
+ - ionos
112
+ metrics_map:
113
+ field_1: EXACT
114
+ field_2 : LEVENSHTEIN
97
115
 
98
- workflow: SIMULATOR # SIMULATOR, COMPARATOR, ASSESSOR.
99
- repository: FIRESTORE # FIRESTORE, FILESYSTEM, MONGODB.
100
- evaluators: # JUDGE, REFERENCE, RAG.
101
- - JUDGE
102
- - REFERENCE
116
+ reference_data:
117
+ path:
118
+ data:
103
119
 
104
- endpoint_configuration:
120
+ endpoint:
105
121
  base_url: "http://127.0.0.1:8000"
106
122
  url_path: ''
107
123
  api_key: "<API-KEY>"
108
124
  bearer_token: "<BEARER-TOKEN>"
109
125
  model_id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
110
- payload_path: "../../src/data/payload_example_1.yaml"
111
126
  default_request_payload_template:
127
+ # Change the user message field name only according to the request payload schema (example: 'prompt' to 'message').
112
128
  prompt: "${user_message}"
113
129
  details: "${request_payload}" # Rest of the request payload data.
114
130
  default_response_payload_template:
131
+ # Change the placeholder value only according to the response payload schema (example: ${agent_reply} to ${reply}).
115
132
  agent_reply: "${agent_reply}"
116
- guardrail_flag: "${guardrail_flag}"
117
133
  generated_metadata: "${generated_metadata}"
118
134
 
119
- reference_data:
120
- source: LOCAL # LOCAL or REMOTE.
121
- path: "../../src/data/conversation_example_1.json"
122
-
123
- metrics_map:
124
- field_1: EXACT
125
- field_2: LEVENSHTEIN
135
+ repository:
136
+ type: FIRESTORE # Pick one of the following: FIRESTORE, FILESYSTEM, MONGODB.
137
+ project_id: "(default)"
138
+ database_name: ""
126
139
  ```
127
140
 
128
141
  - **Endpoint Configuration**: Define how to interact with your LLM-based system (base URL, auth, payload templates).
@@ -133,33 +146,26 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
133
146
 
134
147
  ```json
135
148
  {
136
- "id": "1fa6f6ed-3cfe-4c0b-b389-7292f58879d4",
137
149
  "scripts": [
138
150
  {
139
- "id": "65f58cec-d55d-4a24-bf16-fa8327a3aa6b",
140
151
  "interactions": [
141
152
  {
142
- "id": "e99a2898-6a79-4a20-ac85-dfe977ea9935",
143
153
  "user_message": "Hello, I would like to book an appointment with a doctor.",
144
154
  "reference_reply": "Sure, I can help with that. Could you please specify the type of doctor you need to see?",
145
155
  "interaction_type": "initial",
146
156
  "reference_metadata": {},
147
- "generated_metadata": {},
148
157
  "guardrail_flag": false,
149
158
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
150
159
  },
151
160
  {
152
- "id": "fe5c539a-d0a1-40ee-97bd-dbe456703ccc",
153
161
  "user_message": "I need to see a cardiologist.",
154
162
  "reference_reply": "When would you like to schedule your appointment?",
155
163
  "interaction_type": "intermediate",
156
164
  "reference_metadata": {},
157
- "generated_metadata": {},
158
165
  "guardrail_flag": false,
159
166
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
160
167
  },
161
168
  {
162
- "id": "2cfdbd1c-a065-48bb-9aa9-b958342154b1",
163
169
  "user_message": "I would like to book it for next Monday morning.",
164
170
  "reference_reply": "We have an available slot at 10 AM next Monday. Does that work for you?",
165
171
  "interaction_type": "intermediate",
@@ -168,11 +174,6 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
168
174
  "date": "next Monday",
169
175
  "time": "10 AM"
170
176
  },
171
- "generated_metadata": {
172
- "appointment_type": "Cardiology",
173
- "date": "next Monday",
174
- "time": "morning"
175
- },
176
177
  "guardrail_flag": false,
177
178
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
178
179
  },
@@ -182,7 +183,6 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
182
183
  "reference_reply": "Your appointment with the cardiologist is booked for 10 AM next Monday. Is there anything else I can help you with?",
183
184
  "interaction_type": "final",
184
185
  "reference_metadata": {},
185
- "generated_metadata": {},
186
186
  "guardrail_flag": false,
187
187
  "request_payload": {"user_id": "0001", "user_role": "ADMIN"}
188
188
  }
@@ -195,9 +195,22 @@ For conversation scripts (used in Simulator), provide a JSON file with this sche
195
195
  ]
196
196
  }
197
197
  ```
198
-
199
198
  - **Fields**: Include user messages, reference/references replies, metadata for comparison, guardrail flags, and request payloads.
200
199
 
200
+ In the `.env` you need to add the LLM providers credentials that will be used for the evaluation process.
201
+ ```
202
+ OPENAI_API_KEY=
203
+ IONOS_API_KEY=
204
+ ANTHROPIC_API_KEY=
205
+ MISTRAL_API_KEY=
206
+
207
+ # For IONOS, you must include the base URL and the model ID.
208
+ IONOS_BASE_URL="https://inference.de-txl.ionos.com"
209
+ IONOS_MODEL_ID="0b6c4a15-bb8d-4092-82b0-f357b77c59fd"
210
+
211
+ WORKFLOW_CONFIG_PATH="../../src/data/workflow_config_1.yaml"
212
+ ```
213
+
201
214
  ## Usage Example
202
215
 
203
216
  To run an evaluation:
@@ -214,15 +227,68 @@ if __name__ == "__main__":
214
227
  config = WorkflowConfig.load(path="../data/workflow_config.yaml")
215
228
 
216
229
  # Run evaluation session
217
- with EvaluationSession(session_name="sim-test", workflow_config=config) as session:
230
+ with EvaluationSession(session_name="test-session-1", workflow_config=config) as session:
231
+ session.run()
232
+ results = session.workflow.collect_results()
233
+ print("Results:", results)
234
+
235
+ stats = session.get_stats()
236
+ print(f"session stats:\n{stats}")
237
+ ```
238
+
239
+ Alternatively, if you want to pass the configuration and reference data from in-memory variables,
240
+ you can manually load the data like the following:
241
+ ```python
242
+ if __name__ == "__main__":
243
+ from levelapp.workflow import WorkflowConfig
244
+ from levelapp.core.session import EvaluationSession
245
+
246
+ # Firestore -> retrieve endpoint config -> data => config_dict
247
+
248
+ config_dict = {
249
+ "process": {"project_name": "test-project", "workflow_type": "SIMULATOR", "evaluation_params": {"attempts": 2}},
250
+ "evaluation": {"evaluators": ["JUDGE"], "providers": ["openai", "ionos"]},
251
+ "reference_data": {"path": "", "data": {}},
252
+ "endpoint": {"base_url": "http://127.0.0.1:8000", "api_key": "key", "model_id": "model"},
253
+ "repository": {"type": "FIRESTORE", "source": "IN_MEMORY", "metrics_map": {"field_1": "EXACT"}},
254
+ }
255
+
256
+ content = {
257
+ "scripts": [
258
+ {
259
+ "interactions": [
260
+ {
261
+ "user_message": "Hello!",
262
+ "reference_reply": "Hello, how can I help you!"
263
+ },
264
+ {
265
+ "user_message": "I need an apartment",
266
+ "reference_reply": "sorry, but I can only assist you with booking medical appointments."
267
+ },
268
+ ]
269
+ },
270
+ ]
271
+ }
272
+
273
+ # Load configuration from a dict variable
274
+ config = WorkflowConfig.from_dict(content=config_dict)
275
+
276
+ # Load reference data from dict variable
277
+ config.set_reference_data(content=content)
278
+
279
+ evaluation_session = EvaluationSession(session_name="test-session-2", workflow_config=config)
280
+
281
+ with evaluation_session as session:
218
282
  session.run()
219
283
  results = session.workflow.collect_results()
220
284
  print("Results:", results)
221
285
 
222
286
  stats = session.get_stats()
223
287
  print(f"session stats:\n{stats}")
288
+
224
289
  ```
225
290
 
291
+
226
292
  - This loads the config, runs the specified workflow (e.g., Simulator), collects results, and prints stats.
227
293
 
228
294
  For more examples, see the `examples/` directory.
@@ -20,9 +20,10 @@ levelapp/config/endpoint.py,sha256=ll34rZ0KRmUwI81EWJ3HX9i6pziq2YrQb84kv4ErymI,7
20
20
  levelapp/config/prompts.py,sha256=crjOk01weLz5_IdF6dDZWPfSmiKNL8SgnbThyf4Jz2o,1345
21
21
  levelapp/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  levelapp/core/base.py,sha256=oh4OkKgwGxmw_jgjX6wrBoK0KPc1JvCMZfbZP_mGmIg,12453
23
- levelapp/core/session.py,sha256=0pE8iUNyA6__RYGJlfaqSr0MtOT0vYIwr496KvC1VlA,7885
23
+ levelapp/core/schemas.py,sha256=UrUnU6h90uqS1LpcFqmMEpgWJ3772ZD5QBIytczmAbE,469
24
+ levelapp/core/session.py,sha256=6bP6s3iWxofWL9LT4qv90VGOntAIa6PBJ_EaWZT0Ur4,7918
24
25
  levelapp/evaluator/__init__.py,sha256=K-P75Q1FXXLCNqH1wyhT9sf4y2R9a1qR5449AXEsY1k,109
25
- levelapp/evaluator/evaluator.py,sha256=JlqszyfOvA8JbgHmQNhMSBkminF95_AjXe14jg_OO7s,9908
26
+ levelapp/evaluator/evaluator.py,sha256=SSveWDIXVg9CTLqexAZJSRpR_wtd5f1bD_s5dG5HJyc,10544
26
27
  levelapp/metrics/__init__.py,sha256=1y4gDLOu2Jz4QVIgPH-v9YMgaWOFr263tYLUTiFJ-fc,1965
27
28
  levelapp/metrics/embedding.py,sha256=wvlT8Q5DjDT6GrAIFtc5aFbA_80hDLUXMP4RbSpSwHE,115
28
29
  levelapp/metrics/exact.py,sha256=Kb13nD2OVLrl3iYHaXrxDfrxDuhW0SMVvLAEXPaJtlY,6235
@@ -30,17 +31,19 @@ levelapp/metrics/fuzzy.py,sha256=Rg8ashzMxtQwKO-z_LLzdj2PDIRqL4CBw6PGRf9IBrI,259
30
31
  levelapp/metrics/token.py,sha256=yQi9hxT_fXTGjLiCCemDxQ4Uk2zD-wQYtSnDlI2AuuY,3521
31
32
  levelapp/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
33
  levelapp/repository/__init__.py,sha256=hNmFRZ7kKJN1mMlOHeW9xf0j9Q7gqTXYJ3hMCzk9to4,79
33
- levelapp/repository/firestore.py,sha256=6HuBCgcBBg-E8QHr06SVDXKY0QBhsWPOhmO7Ps3BWMs,10060
34
+ levelapp/repository/firestore.py,sha256=K9JgxsNCelAKtzTDv19c1dHRlitMeRzo7H3caTlKuF8,10369
34
35
  levelapp/simulator/__init__.py,sha256=8Dz8g7rbpBZX3WoknVmMVoWm_VT72ZL9BABOF1xFpqs,83
35
- levelapp/simulator/schemas.py,sha256=YD6yu3UDXNoApX3R9VpCnpe3sWF7Gs8TNNUgsnqTSXg,4026
36
- levelapp/simulator/simulator.py,sha256=GhoJYcl9GS9vfK58uLwPiI5_cFExFb_Xw_EjElesmvY,17120
36
+ levelapp/simulator/schemas.py,sha256=eEFNNWiAJ8FAIObWcFWF1pL9LmjWwz_2Y-yfY3pHESc,4061
37
+ levelapp/simulator/simulator.py,sha256=RKzpV-yDmtugJ3ScJTTA9MwxTdD-oEkwRZLm8N_scjg,19972
37
38
  levelapp/simulator/utils.py,sha256=qx0JdV1ZDQdTRVKa9xfq278ASrE44GBXSnJZJuhICqo,7365
38
- levelapp/workflow/__init__.py,sha256=X73ulbq3Vk_Vsug1eU2m7LsBCmbR7ehQlwUVCh4Rbb0,142
39
- levelapp/workflow/base.py,sha256=4LFrJyGnuSUJSZ1oOVI_uF1cwEpbYeFFZXHPSh3aEGo,4289
40
- levelapp/workflow/factory.py,sha256=gW5wLv_kJfzrSTQeSLFoUunk87rw0NzW4wmJLIfcwX8,2228
41
- levelapp/workflow/registration.py,sha256=33tPCyzP-XeI1vefjA3PmVdBl7x4wtsVGg05Bh_j7os,363
42
- levelapp/workflow/schemas.py,sha256=Q4TmjqTz_tKzMPXD_fJ2hc7K3IVsn_U56BzHG94WwIA,4090
43
- levelapp-0.1.0.dist-info/METADATA,sha256=6XNgxGHG8oNc-yjg82ZAaGi_UVvrnOalIpUmS5viaW8,10242
44
- levelapp-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
45
- levelapp-0.1.0.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
- levelapp-0.1.0.dist-info/RECORD,,
39
+ levelapp/workflow/__init__.py,sha256=27b2obG7ObhR43yd2uH-R0koRB7-DG8Emnvrq8EjsTA,193
40
+ levelapp/workflow/base.py,sha256=t-vJzwv_OJ9W_pORySJwZq9IENGbWAF3-9-7ozaKDPk,4637
41
+ levelapp/workflow/config.py,sha256=ClQaKSWxj7rFcOEQ4budmgOqMBskg6wAibf_gzqUf1o,2142
42
+ levelapp/workflow/context.py,sha256=DzyZEb8WHug6vWfzf7BIjZAwtmv43HqgbaB20Pw3eWo,2660
43
+ levelapp/workflow/factory.py,sha256=PZHp3AVt61Eop3HwGQDfbO0ju5k7rvNDHKy09eywMTQ,1245
44
+ levelapp/workflow/registration.py,sha256=VHUHjLHXad5kjcKukaEOIf7hBZ09bT3HAzVmIT08aLo,359
45
+ levelapp/workflow/runtime.py,sha256=cFyXNWXSuURKbrMDHdkTcjeItM9wHP-5DPljntwYL5g,686
46
+ levelapp-0.1.1.dist-info/METADATA,sha256=ozbAgnWY4gl149zqzPgYS7-qkKGutJFb9qL0CoYHbh0,12500
47
+ levelapp-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
48
+ levelapp-0.1.1.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ levelapp-0.1.1.dist-info/RECORD,,