genlayer-test 0.9.0__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/PKG-INFO +49 -176
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/README.md +48 -175
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/genlayer_test.egg-info/PKG-INFO +49 -176
- genlayer_test-0.10.0/gltest/fixtures.py +38 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/config/constants.py +0 -1
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/config/plugin.py +0 -11
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/config/types.py +0 -14
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/config/user.py +0 -16
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/pyproject.toml +1 -1
- genlayer_test-0.9.0/gltest/fixtures.py +0 -87
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/LICENSE +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/genlayer_test.egg-info/SOURCES.txt +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/genlayer_test.egg-info/dependency_links.txt +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/genlayer_test.egg-info/entry_points.txt +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/genlayer_test.egg-info/requires.txt +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/genlayer_test.egg-info/top_level.txt +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/__init__.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/accounts.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/artifacts/__init__.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/artifacts/contract.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/assertions.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/clients.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/contracts/__init__.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/contracts/contract.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/contracts/contract_factory.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/contracts/contract_functions.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/contracts/method_stats.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/contracts/stats_collector.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/contracts/utils.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/exceptions.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/helpers/__init__.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/helpers/fixture_snapshot.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/helpers/take_snapshot.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/logging.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/types.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/utils.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/validators/__init__.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest/validators/validator_factory.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/config/__init__.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/config/general.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/config/pytest_context.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/logging.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/gltest_cli/main.py +0 -0
- {genlayer_test-0.9.0 → genlayer_test-0.10.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: genlayer-test
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.0
|
|
4
4
|
Summary: GenLayer Testing Suite
|
|
5
5
|
Author: GenLayer
|
|
6
6
|
License-Expression: MIT
|
|
@@ -270,31 +270,7 @@ $ gltest --default-wait-interval <default_wait_interval>
|
|
|
270
270
|
$ gltest --default-wait-retries <default_wait_retries>
|
|
271
271
|
```
|
|
272
272
|
|
|
273
|
-
10. Run tests with
|
|
274
|
-
```bash
|
|
275
|
-
$ gltest --test-with-mocks
|
|
276
|
-
```
|
|
277
|
-
The `--test-with-mocks` flag enables mocking of LLM responses when creating validators. This is particularly useful for:
|
|
278
|
-
- Testing without actual LLM API calls
|
|
279
|
-
- Ensuring deterministic test results
|
|
280
|
-
- Faster test execution
|
|
281
|
-
- Testing specific edge cases with controlled responses
|
|
282
|
-
|
|
283
|
-
When using this flag with the `setup_validators` fixture, you can provide custom mock responses:
|
|
284
|
-
```python
|
|
285
|
-
def test_with_mocked_llm(setup_validators):
|
|
286
|
-
# Setup validators with a specific mock response
|
|
287
|
-
mock_response = {"result": "This is a mocked LLM response"}
|
|
288
|
-
setup_validators(mock_response=mock_response)
|
|
289
|
-
|
|
290
|
-
# Your LLM-based contract will receive the mocked response
|
|
291
|
-
contract = factory.deploy()
|
|
292
|
-
result = contract.llm_method() # Will use the mocked response
|
|
293
|
-
```
|
|
294
|
-
|
|
295
|
-
Note: This feature is only available when running tests on localnet.
|
|
296
|
-
|
|
297
|
-
11. Run tests with leader-only mode enabled
|
|
273
|
+
10. Run tests with leader-only mode enabled
|
|
298
274
|
```bash
|
|
299
275
|
$ gltest --leader-only
|
|
300
276
|
```
|
|
@@ -475,7 +451,7 @@ def test_write_methods():
|
|
|
475
451
|
).transact(
|
|
476
452
|
value=0, # Optional: amount of native currency to send
|
|
477
453
|
consensus_max_rotations=3, # Optional: max consensus rotations
|
|
478
|
-
wait_interval=
|
|
454
|
+
wait_interval=1000, # Optional: milliseconds between status checks
|
|
479
455
|
wait_retries=10, # Optional: max number of retries
|
|
480
456
|
transaction_context=None, # Optional: custom transaction context
|
|
481
457
|
)
|
|
@@ -538,7 +514,6 @@ The following fixtures are available in `gltest.fixtures`:
|
|
|
538
514
|
- **`gl_client`** (session scope) - GenLayer client instance for network operations
|
|
539
515
|
- **`default_account`** (session scope) - Default account for testing and deployments
|
|
540
516
|
- **`accounts`** (session scope) - List of test accounts for multi-account scenarios
|
|
541
|
-
- **`setup_validators`** (function scope) - Function to create test validators for LLM operations
|
|
542
517
|
|
|
543
518
|
##### 1. `gl_client` (session scope)
|
|
544
519
|
Provides a GenLayer PY client instance that's created once per test session. This is useful for operations that interact directly with the GenLayer network.
|
|
@@ -573,26 +548,6 @@ def test_multiple_accounts(accounts):
|
|
|
573
548
|
contract.transfer(args=[receiver.address, 100], account=sender)
|
|
574
549
|
```
|
|
575
550
|
|
|
576
|
-
##### 4. `setup_validators` (function scope)
|
|
577
|
-
Creates test validators for localnet environment. This fixture is particularly useful for testing LLM-based contract methods and consensus behavior. It yields a function that allows you to configure validators with custom settings.
|
|
578
|
-
|
|
579
|
-
```python
|
|
580
|
-
def test_with_validators(setup_validators):
|
|
581
|
-
# Setup validators with default configuration
|
|
582
|
-
setup_validators()
|
|
583
|
-
|
|
584
|
-
# Or setup with custom mock responses for testing
|
|
585
|
-
mock_response = {"result": "mocked LLM response"}
|
|
586
|
-
setup_validators(mock_response=mock_response, n_validators=3)
|
|
587
|
-
|
|
588
|
-
# Now test your LLM-based contract methods
|
|
589
|
-
contract = factory.deploy()
|
|
590
|
-
result = contract.llm_based_method()
|
|
591
|
-
```
|
|
592
|
-
|
|
593
|
-
Parameters for `setup_validators`:
|
|
594
|
-
- `mock_response` (dict, optional): Mock validator response when using `--test-with-mocks` flag
|
|
595
|
-
- `n_validators` (int, optional): Number of validators to create (default: 5)
|
|
596
551
|
|
|
597
552
|
#### Using Fixtures in Your Tests
|
|
598
553
|
|
|
@@ -602,9 +557,7 @@ To use these fixtures, simply import them and include them as parameters in your
|
|
|
602
557
|
from gltest import get_contract_factory
|
|
603
558
|
from gltest.assertions import tx_execution_succeeded
|
|
604
559
|
|
|
605
|
-
def test_complete_workflow(gl_client, default_account, accounts
|
|
606
|
-
# Setup validators for LLM operations
|
|
607
|
-
setup_validators()
|
|
560
|
+
def test_complete_workflow(gl_client, default_account, accounts):
|
|
608
561
|
|
|
609
562
|
# Deploy contract with default account
|
|
610
563
|
factory = get_contract_factory("MyContract")
|
|
@@ -650,7 +603,7 @@ def test_analyze_method():
|
|
|
650
603
|
print(f"Reliability score: {analysis.reliability_score:.2f}%")
|
|
651
604
|
print(f"Unique states: {analysis.unique_states}")
|
|
652
605
|
print(f"Execution time: {analysis.execution_time:.1f}s")
|
|
653
|
-
|
|
606
|
+
|
|
654
607
|
# The analysis returns a MethodStatsSummary object with:
|
|
655
608
|
# - method: The contract method name
|
|
656
609
|
# - args: Arguments passed to the method
|
|
@@ -668,127 +621,6 @@ The `.analyze()` method helps you:
|
|
|
668
621
|
- Identify edge cases and failure patterns
|
|
669
622
|
- Benchmark performance across multiple runs
|
|
670
623
|
|
|
671
|
-
### Mock LLM Responses
|
|
672
|
-
|
|
673
|
-
The Mock LLM system allows you to simulate Large Language Model responses in GenLayer tests. This is essential for creating deterministic tests by providing predefined responses instead of relying on actual LLM calls.
|
|
674
|
-
|
|
675
|
-
#### Basic Structure
|
|
676
|
-
|
|
677
|
-
The mock system consists of a response dictionary that maps GenLayer methods to their mocked responses:
|
|
678
|
-
|
|
679
|
-
```python
|
|
680
|
-
mock_response = {
|
|
681
|
-
"response": {}, # Optional: mocks gl.nondet.exec_prompt
|
|
682
|
-
"eq_principle_prompt_comparative": {}, # Optional: mocks gl.eq_principle.prompt_comparative
|
|
683
|
-
"eq_principle_prompt_non_comparative": {} # Optional: mocks gl.eq_principle.prompt_non_comparative
|
|
684
|
-
}
|
|
685
|
-
|
|
686
|
-
setup_validators(mock_response)
|
|
687
|
-
```
|
|
688
|
-
|
|
689
|
-
#### Method Mappings
|
|
690
|
-
|
|
691
|
-
| Mock Key | GenLayer Method |
|
|
692
|
-
|----------|----------------|
|
|
693
|
-
| `"response"` | `gl.nondet.exec_prompt` |
|
|
694
|
-
| `"eq_principle_prompt_comparative"` | `gl.eq_principle.prompt_comparative` |
|
|
695
|
-
| `"eq_principle_prompt_non_comparative"` | `gl.eq_principle.prompt_non_comparative` |
|
|
696
|
-
|
|
697
|
-
#### How It Works
|
|
698
|
-
|
|
699
|
-
The mock system works by pattern matching against the user message that gets built internally. When a GenLayer method is called:
|
|
700
|
-
|
|
701
|
-
1. A user message is constructed internally (`<user_message>`)
|
|
702
|
-
2. The mock system searches for strings within that message
|
|
703
|
-
3. If a matching string is found in the mock dictionary, the associated response is returned
|
|
704
|
-
|
|
705
|
-
##### String Matching Rules
|
|
706
|
-
|
|
707
|
-
The system performs **substring matching** on the user message. The key in your mock dictionary must be contained within the actual user message.
|
|
708
|
-
|
|
709
|
-
#### Examples
|
|
710
|
-
|
|
711
|
-
##### Basic Example
|
|
712
|
-
|
|
713
|
-
```python
|
|
714
|
-
# Mock setup
|
|
715
|
-
mock_response = {
|
|
716
|
-
"eq_principle_prompt_comparative": {
|
|
717
|
-
"The value of give_coin has to match": True
|
|
718
|
-
}
|
|
719
|
-
}
|
|
720
|
-
setup_validators(mock_response)
|
|
721
|
-
|
|
722
|
-
# In your contract
|
|
723
|
-
result = gl.eq_principle.prompt_comparative(
|
|
724
|
-
get_wizard_answer,
|
|
725
|
-
"The value of give_coin has to match" # This string will be matched
|
|
726
|
-
)
|
|
727
|
-
# result will be True
|
|
728
|
-
```
|
|
729
|
-
|
|
730
|
-
##### Substring Matching Examples
|
|
731
|
-
|
|
732
|
-
✅ **Will work** - Partial match:
|
|
733
|
-
```python
|
|
734
|
-
"eq_principle_prompt_comparative": {
|
|
735
|
-
"The value of give_coin": True # Substring of the full message
|
|
736
|
-
}
|
|
737
|
-
```
|
|
738
|
-
|
|
739
|
-
❌ **Won't work** - Extra words break the match:
|
|
740
|
-
```python
|
|
741
|
-
"eq_principle_prompt_comparative": {
|
|
742
|
-
"The good value of give_coin": True # "good" is not in the actual message
|
|
743
|
-
}
|
|
744
|
-
```
|
|
745
|
-
|
|
746
|
-
##### Complete Example
|
|
747
|
-
|
|
748
|
-
```python
|
|
749
|
-
from gltest import get_contract_factory
|
|
750
|
-
from gltest.fixtures import setup_validators
|
|
751
|
-
|
|
752
|
-
def test_with_mocked_llm(setup_validators):
|
|
753
|
-
# Define mock responses
|
|
754
|
-
mock_response = {
|
|
755
|
-
"response": {
|
|
756
|
-
"What is the weather?": "It's sunny today",
|
|
757
|
-
"Calculate 2+2": "4"
|
|
758
|
-
},
|
|
759
|
-
"eq_principle_prompt_comparative": {
|
|
760
|
-
"values must be equal": True,
|
|
761
|
-
"amounts should match": False
|
|
762
|
-
},
|
|
763
|
-
"eq_principle_prompt_non_comparative": {
|
|
764
|
-
"Is this valid?": True
|
|
765
|
-
}
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
# Initialize the mock system
|
|
769
|
-
setup_validators(mock_response)
|
|
770
|
-
|
|
771
|
-
# Deploy and test your contract
|
|
772
|
-
factory = get_contract_factory("MyLLMContract")
|
|
773
|
-
contract = factory.deploy()
|
|
774
|
-
|
|
775
|
-
# Your LLM methods will use the mocked responses
|
|
776
|
-
result = contract.check_weather() # Uses mocked response
|
|
777
|
-
```
|
|
778
|
-
|
|
779
|
-
#### Best Practices
|
|
780
|
-
|
|
781
|
-
1. **Be specific with match strings**: Use unique substrings that won't accidentally match other prompts
|
|
782
|
-
2. **Test your matches**: Verify that your mock strings actually appear in the generated user messages
|
|
783
|
-
3. **Keep mocks simple**: Mock responses should be minimal and focused on the test case
|
|
784
|
-
4. **Document your mocks**: Comment why specific responses are mocked for future reference
|
|
785
|
-
5. **Use with `--test-with-mocks` flag**: Enable mocking when running tests: `gltest --test-with-mocks`
|
|
786
|
-
|
|
787
|
-
#### Notes
|
|
788
|
-
|
|
789
|
-
- Mock responses are only available when running tests on localnet
|
|
790
|
-
- The `setup_validators` fixture handles the mock setup when provided with a mock_response
|
|
791
|
-
- Mocking is particularly useful for CI/CD pipelines where deterministic results are required
|
|
792
624
|
|
|
793
625
|
### Custom Transaction Context
|
|
794
626
|
|
|
@@ -839,17 +671,58 @@ def test_with_custom_transaction_context():
|
|
|
839
671
|
)
|
|
840
672
|
```
|
|
841
673
|
|
|
674
|
+
### Mock LLM Responses
|
|
675
|
+
|
|
676
|
+
The Mock LLM system allows you to simulate Large Language Model responses in GenLayer tests. This is essential for creating deterministic tests by providing predefined responses instead of relying on actual LLM calls.
|
|
677
|
+
|
|
678
|
+
#### Basic Structure
|
|
679
|
+
|
|
680
|
+
The mock system consists of a response dictionary that maps GenLayer methods to their mocked responses:
|
|
681
|
+
|
|
682
|
+
```python
|
|
683
|
+
from gltest.types import MockedLLMResponse
|
|
684
|
+
|
|
685
|
+
mock_response: MockedLLMResponse = {
|
|
686
|
+
"nondet_exec_prompt": {}, # Optional: mocks gl.nondet.exec_prompt
|
|
687
|
+
"eq_principle_prompt_comparative": {}, # Optional: mocks gl.eq_principle.prompt_comparative
|
|
688
|
+
"eq_principle_prompt_non_comparative": {} # Optional: mocks gl.eq_principle.prompt_non_comparative
|
|
689
|
+
}
|
|
690
|
+
```
|
|
691
|
+
|
|
692
|
+
#### Method Mappings
|
|
693
|
+
|
|
694
|
+
| Mock Key | GenLayer Method |
|
|
695
|
+
|----------|----------------|
|
|
696
|
+
| `"nondet_exec_prompt"` | `gl.nondet.exec_prompt` |
|
|
697
|
+
| `"eq_principle_prompt_comparative"` | `gl.eq_principle.prompt_comparative` |
|
|
698
|
+
| `"eq_principle_prompt_non_comparative"` | `gl.eq_principle.prompt_non_comparative` |
|
|
699
|
+
|
|
700
|
+
#### How It Works
|
|
701
|
+
|
|
702
|
+
The mock system works by pattern matching against the user message that gets built internally. When a GenLayer method is called:
|
|
703
|
+
|
|
704
|
+
1. A user message is constructed internally (`<user_message>`)
|
|
705
|
+
2. The mock system searches for strings within that message
|
|
706
|
+
3. If a matching string is found in the mock dictionary, the associated response is returned
|
|
707
|
+
|
|
708
|
+
##### String Matching Rules
|
|
709
|
+
|
|
710
|
+
The system performs **substring matching** on the user message. The key in your mock dictionary must be contained within the actual user message.
|
|
711
|
+
|
|
712
|
+
|
|
842
713
|
#### Mock Validators with Transaction Context
|
|
843
714
|
|
|
844
715
|
Combine mock validators with custom datetime for fully deterministic tests:
|
|
845
716
|
|
|
846
717
|
```python
|
|
718
|
+
from gltest.types import MockedLLMResponse
|
|
719
|
+
|
|
847
720
|
def test_with_mocked_context():
|
|
848
721
|
factory = get_contract_factory("LLMContract")
|
|
849
722
|
validator_factory = get_validator_factory()
|
|
850
723
|
|
|
851
724
|
# Define mock LLM responses
|
|
852
|
-
mock_response = {
|
|
725
|
+
mock_response: MockedLLMResponse = {
|
|
853
726
|
"nondet_exec_prompt": {
|
|
854
727
|
"analyze this": "positive sentiment"
|
|
855
728
|
},
|
|
@@ -1041,7 +914,7 @@ def test_validator_cloning():
|
|
|
1041
914
|
tx_receipt = contract.set_value(
|
|
1042
915
|
args=["new_value"],
|
|
1043
916
|
).transact(
|
|
1044
|
-
wait_interval=
|
|
917
|
+
wait_interval=2000, # Increase wait interval between status checks
|
|
1045
918
|
wait_retries=20, # Increase number of retry attempts
|
|
1046
919
|
)
|
|
1047
920
|
```
|
|
@@ -1058,7 +931,7 @@ def test_validator_cloning():
|
|
|
1058
931
|
# For critical operations, use more conservative settings
|
|
1059
932
|
contract = factory.deploy(
|
|
1060
933
|
consensus_max_rotations=10, # More rotations for better reliability
|
|
1061
|
-
wait_interval=
|
|
934
|
+
wait_interval=3000, # Longer wait between checks
|
|
1062
935
|
wait_retries=30 # More retries for consensus
|
|
1063
936
|
)
|
|
1064
937
|
```
|
|
@@ -247,31 +247,7 @@ $ gltest --default-wait-interval <default_wait_interval>
|
|
|
247
247
|
$ gltest --default-wait-retries <default_wait_retries>
|
|
248
248
|
```
|
|
249
249
|
|
|
250
|
-
10. Run tests with
|
|
251
|
-
```bash
|
|
252
|
-
$ gltest --test-with-mocks
|
|
253
|
-
```
|
|
254
|
-
The `--test-with-mocks` flag enables mocking of LLM responses when creating validators. This is particularly useful for:
|
|
255
|
-
- Testing without actual LLM API calls
|
|
256
|
-
- Ensuring deterministic test results
|
|
257
|
-
- Faster test execution
|
|
258
|
-
- Testing specific edge cases with controlled responses
|
|
259
|
-
|
|
260
|
-
When using this flag with the `setup_validators` fixture, you can provide custom mock responses:
|
|
261
|
-
```python
|
|
262
|
-
def test_with_mocked_llm(setup_validators):
|
|
263
|
-
# Setup validators with a specific mock response
|
|
264
|
-
mock_response = {"result": "This is a mocked LLM response"}
|
|
265
|
-
setup_validators(mock_response=mock_response)
|
|
266
|
-
|
|
267
|
-
# Your LLM-based contract will receive the mocked response
|
|
268
|
-
contract = factory.deploy()
|
|
269
|
-
result = contract.llm_method() # Will use the mocked response
|
|
270
|
-
```
|
|
271
|
-
|
|
272
|
-
Note: This feature is only available when running tests on localnet.
|
|
273
|
-
|
|
274
|
-
11. Run tests with leader-only mode enabled
|
|
250
|
+
10. Run tests with leader-only mode enabled
|
|
275
251
|
```bash
|
|
276
252
|
$ gltest --leader-only
|
|
277
253
|
```
|
|
@@ -452,7 +428,7 @@ def test_write_methods():
|
|
|
452
428
|
).transact(
|
|
453
429
|
value=0, # Optional: amount of native currency to send
|
|
454
430
|
consensus_max_rotations=3, # Optional: max consensus rotations
|
|
455
|
-
wait_interval=
|
|
431
|
+
wait_interval=1000, # Optional: milliseconds between status checks
|
|
456
432
|
wait_retries=10, # Optional: max number of retries
|
|
457
433
|
transaction_context=None, # Optional: custom transaction context
|
|
458
434
|
)
|
|
@@ -515,7 +491,6 @@ The following fixtures are available in `gltest.fixtures`:
|
|
|
515
491
|
- **`gl_client`** (session scope) - GenLayer client instance for network operations
|
|
516
492
|
- **`default_account`** (session scope) - Default account for testing and deployments
|
|
517
493
|
- **`accounts`** (session scope) - List of test accounts for multi-account scenarios
|
|
518
|
-
- **`setup_validators`** (function scope) - Function to create test validators for LLM operations
|
|
519
494
|
|
|
520
495
|
##### 1. `gl_client` (session scope)
|
|
521
496
|
Provides a GenLayer PY client instance that's created once per test session. This is useful for operations that interact directly with the GenLayer network.
|
|
@@ -550,26 +525,6 @@ def test_multiple_accounts(accounts):
|
|
|
550
525
|
contract.transfer(args=[receiver.address, 100], account=sender)
|
|
551
526
|
```
|
|
552
527
|
|
|
553
|
-
##### 4. `setup_validators` (function scope)
|
|
554
|
-
Creates test validators for localnet environment. This fixture is particularly useful for testing LLM-based contract methods and consensus behavior. It yields a function that allows you to configure validators with custom settings.
|
|
555
|
-
|
|
556
|
-
```python
|
|
557
|
-
def test_with_validators(setup_validators):
|
|
558
|
-
# Setup validators with default configuration
|
|
559
|
-
setup_validators()
|
|
560
|
-
|
|
561
|
-
# Or setup with custom mock responses for testing
|
|
562
|
-
mock_response = {"result": "mocked LLM response"}
|
|
563
|
-
setup_validators(mock_response=mock_response, n_validators=3)
|
|
564
|
-
|
|
565
|
-
# Now test your LLM-based contract methods
|
|
566
|
-
contract = factory.deploy()
|
|
567
|
-
result = contract.llm_based_method()
|
|
568
|
-
```
|
|
569
|
-
|
|
570
|
-
Parameters for `setup_validators`:
|
|
571
|
-
- `mock_response` (dict, optional): Mock validator response when using `--test-with-mocks` flag
|
|
572
|
-
- `n_validators` (int, optional): Number of validators to create (default: 5)
|
|
573
528
|
|
|
574
529
|
#### Using Fixtures in Your Tests
|
|
575
530
|
|
|
@@ -579,9 +534,7 @@ To use these fixtures, simply import them and include them as parameters in your
|
|
|
579
534
|
from gltest import get_contract_factory
|
|
580
535
|
from gltest.assertions import tx_execution_succeeded
|
|
581
536
|
|
|
582
|
-
def test_complete_workflow(gl_client, default_account, accounts
|
|
583
|
-
# Setup validators for LLM operations
|
|
584
|
-
setup_validators()
|
|
537
|
+
def test_complete_workflow(gl_client, default_account, accounts):
|
|
585
538
|
|
|
586
539
|
# Deploy contract with default account
|
|
587
540
|
factory = get_contract_factory("MyContract")
|
|
@@ -627,7 +580,7 @@ def test_analyze_method():
|
|
|
627
580
|
print(f"Reliability score: {analysis.reliability_score:.2f}%")
|
|
628
581
|
print(f"Unique states: {analysis.unique_states}")
|
|
629
582
|
print(f"Execution time: {analysis.execution_time:.1f}s")
|
|
630
|
-
|
|
583
|
+
|
|
631
584
|
# The analysis returns a MethodStatsSummary object with:
|
|
632
585
|
# - method: The contract method name
|
|
633
586
|
# - args: Arguments passed to the method
|
|
@@ -645,127 +598,6 @@ The `.analyze()` method helps you:
|
|
|
645
598
|
- Identify edge cases and failure patterns
|
|
646
599
|
- Benchmark performance across multiple runs
|
|
647
600
|
|
|
648
|
-
### Mock LLM Responses
|
|
649
|
-
|
|
650
|
-
The Mock LLM system allows you to simulate Large Language Model responses in GenLayer tests. This is essential for creating deterministic tests by providing predefined responses instead of relying on actual LLM calls.
|
|
651
|
-
|
|
652
|
-
#### Basic Structure
|
|
653
|
-
|
|
654
|
-
The mock system consists of a response dictionary that maps GenLayer methods to their mocked responses:
|
|
655
|
-
|
|
656
|
-
```python
|
|
657
|
-
mock_response = {
|
|
658
|
-
"response": {}, # Optional: mocks gl.nondet.exec_prompt
|
|
659
|
-
"eq_principle_prompt_comparative": {}, # Optional: mocks gl.eq_principle.prompt_comparative
|
|
660
|
-
"eq_principle_prompt_non_comparative": {} # Optional: mocks gl.eq_principle.prompt_non_comparative
|
|
661
|
-
}
|
|
662
|
-
|
|
663
|
-
setup_validators(mock_response)
|
|
664
|
-
```
|
|
665
|
-
|
|
666
|
-
#### Method Mappings
|
|
667
|
-
|
|
668
|
-
| Mock Key | GenLayer Method |
|
|
669
|
-
|----------|----------------|
|
|
670
|
-
| `"response"` | `gl.nondet.exec_prompt` |
|
|
671
|
-
| `"eq_principle_prompt_comparative"` | `gl.eq_principle.prompt_comparative` |
|
|
672
|
-
| `"eq_principle_prompt_non_comparative"` | `gl.eq_principle.prompt_non_comparative` |
|
|
673
|
-
|
|
674
|
-
#### How It Works
|
|
675
|
-
|
|
676
|
-
The mock system works by pattern matching against the user message that gets built internally. When a GenLayer method is called:
|
|
677
|
-
|
|
678
|
-
1. A user message is constructed internally (`<user_message>`)
|
|
679
|
-
2. The mock system searches for strings within that message
|
|
680
|
-
3. If a matching string is found in the mock dictionary, the associated response is returned
|
|
681
|
-
|
|
682
|
-
##### String Matching Rules
|
|
683
|
-
|
|
684
|
-
The system performs **substring matching** on the user message. The key in your mock dictionary must be contained within the actual user message.
|
|
685
|
-
|
|
686
|
-
#### Examples
|
|
687
|
-
|
|
688
|
-
##### Basic Example
|
|
689
|
-
|
|
690
|
-
```python
|
|
691
|
-
# Mock setup
|
|
692
|
-
mock_response = {
|
|
693
|
-
"eq_principle_prompt_comparative": {
|
|
694
|
-
"The value of give_coin has to match": True
|
|
695
|
-
}
|
|
696
|
-
}
|
|
697
|
-
setup_validators(mock_response)
|
|
698
|
-
|
|
699
|
-
# In your contract
|
|
700
|
-
result = gl.eq_principle.prompt_comparative(
|
|
701
|
-
get_wizard_answer,
|
|
702
|
-
"The value of give_coin has to match" # This string will be matched
|
|
703
|
-
)
|
|
704
|
-
# result will be True
|
|
705
|
-
```
|
|
706
|
-
|
|
707
|
-
##### Substring Matching Examples
|
|
708
|
-
|
|
709
|
-
✅ **Will work** - Partial match:
|
|
710
|
-
```python
|
|
711
|
-
"eq_principle_prompt_comparative": {
|
|
712
|
-
"The value of give_coin": True # Substring of the full message
|
|
713
|
-
}
|
|
714
|
-
```
|
|
715
|
-
|
|
716
|
-
❌ **Won't work** - Extra words break the match:
|
|
717
|
-
```python
|
|
718
|
-
"eq_principle_prompt_comparative": {
|
|
719
|
-
"The good value of give_coin": True # "good" is not in the actual message
|
|
720
|
-
}
|
|
721
|
-
```
|
|
722
|
-
|
|
723
|
-
##### Complete Example
|
|
724
|
-
|
|
725
|
-
```python
|
|
726
|
-
from gltest import get_contract_factory
|
|
727
|
-
from gltest.fixtures import setup_validators
|
|
728
|
-
|
|
729
|
-
def test_with_mocked_llm(setup_validators):
|
|
730
|
-
# Define mock responses
|
|
731
|
-
mock_response = {
|
|
732
|
-
"response": {
|
|
733
|
-
"What is the weather?": "It's sunny today",
|
|
734
|
-
"Calculate 2+2": "4"
|
|
735
|
-
},
|
|
736
|
-
"eq_principle_prompt_comparative": {
|
|
737
|
-
"values must be equal": True,
|
|
738
|
-
"amounts should match": False
|
|
739
|
-
},
|
|
740
|
-
"eq_principle_prompt_non_comparative": {
|
|
741
|
-
"Is this valid?": True
|
|
742
|
-
}
|
|
743
|
-
}
|
|
744
|
-
|
|
745
|
-
# Initialize the mock system
|
|
746
|
-
setup_validators(mock_response)
|
|
747
|
-
|
|
748
|
-
# Deploy and test your contract
|
|
749
|
-
factory = get_contract_factory("MyLLMContract")
|
|
750
|
-
contract = factory.deploy()
|
|
751
|
-
|
|
752
|
-
# Your LLM methods will use the mocked responses
|
|
753
|
-
result = contract.check_weather() # Uses mocked response
|
|
754
|
-
```
|
|
755
|
-
|
|
756
|
-
#### Best Practices
|
|
757
|
-
|
|
758
|
-
1. **Be specific with match strings**: Use unique substrings that won't accidentally match other prompts
|
|
759
|
-
2. **Test your matches**: Verify that your mock strings actually appear in the generated user messages
|
|
760
|
-
3. **Keep mocks simple**: Mock responses should be minimal and focused on the test case
|
|
761
|
-
4. **Document your mocks**: Comment why specific responses are mocked for future reference
|
|
762
|
-
5. **Use with `--test-with-mocks` flag**: Enable mocking when running tests: `gltest --test-with-mocks`
|
|
763
|
-
|
|
764
|
-
#### Notes
|
|
765
|
-
|
|
766
|
-
- Mock responses are only available when running tests on localnet
|
|
767
|
-
- The `setup_validators` fixture handles the mock setup when provided with a mock_response
|
|
768
|
-
- Mocking is particularly useful for CI/CD pipelines where deterministic results are required
|
|
769
601
|
|
|
770
602
|
### Custom Transaction Context
|
|
771
603
|
|
|
@@ -816,17 +648,58 @@ def test_with_custom_transaction_context():
|
|
|
816
648
|
)
|
|
817
649
|
```
|
|
818
650
|
|
|
651
|
+
### Mock LLM Responses
|
|
652
|
+
|
|
653
|
+
The Mock LLM system allows you to simulate Large Language Model responses in GenLayer tests. This is essential for creating deterministic tests by providing predefined responses instead of relying on actual LLM calls.
|
|
654
|
+
|
|
655
|
+
#### Basic Structure
|
|
656
|
+
|
|
657
|
+
The mock system consists of a response dictionary that maps GenLayer methods to their mocked responses:
|
|
658
|
+
|
|
659
|
+
```python
|
|
660
|
+
from gltest.types import MockedLLMResponse
|
|
661
|
+
|
|
662
|
+
mock_response: MockedLLMResponse = {
|
|
663
|
+
"nondet_exec_prompt": {}, # Optional: mocks gl.nondet.exec_prompt
|
|
664
|
+
"eq_principle_prompt_comparative": {}, # Optional: mocks gl.eq_principle.prompt_comparative
|
|
665
|
+
"eq_principle_prompt_non_comparative": {} # Optional: mocks gl.eq_principle.prompt_non_comparative
|
|
666
|
+
}
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
#### Method Mappings
|
|
670
|
+
|
|
671
|
+
| Mock Key | GenLayer Method |
|
|
672
|
+
|----------|----------------|
|
|
673
|
+
| `"nondet_exec_prompt"` | `gl.nondet.exec_prompt` |
|
|
674
|
+
| `"eq_principle_prompt_comparative"` | `gl.eq_principle.prompt_comparative` |
|
|
675
|
+
| `"eq_principle_prompt_non_comparative"` | `gl.eq_principle.prompt_non_comparative` |
|
|
676
|
+
|
|
677
|
+
#### How It Works
|
|
678
|
+
|
|
679
|
+
The mock system works by pattern matching against the user message that gets built internally. When a GenLayer method is called:
|
|
680
|
+
|
|
681
|
+
1. A user message is constructed internally (`<user_message>`)
|
|
682
|
+
2. The mock system searches for strings within that message
|
|
683
|
+
3. If a matching string is found in the mock dictionary, the associated response is returned
|
|
684
|
+
|
|
685
|
+
##### String Matching Rules
|
|
686
|
+
|
|
687
|
+
The system performs **substring matching** on the user message. The key in your mock dictionary must be contained within the actual user message.
|
|
688
|
+
|
|
689
|
+
|
|
819
690
|
#### Mock Validators with Transaction Context
|
|
820
691
|
|
|
821
692
|
Combine mock validators with custom datetime for fully deterministic tests:
|
|
822
693
|
|
|
823
694
|
```python
|
|
695
|
+
from gltest.types import MockedLLMResponse
|
|
696
|
+
|
|
824
697
|
def test_with_mocked_context():
|
|
825
698
|
factory = get_contract_factory("LLMContract")
|
|
826
699
|
validator_factory = get_validator_factory()
|
|
827
700
|
|
|
828
701
|
# Define mock LLM responses
|
|
829
|
-
mock_response = {
|
|
702
|
+
mock_response: MockedLLMResponse = {
|
|
830
703
|
"nondet_exec_prompt": {
|
|
831
704
|
"analyze this": "positive sentiment"
|
|
832
705
|
},
|
|
@@ -1018,7 +891,7 @@ def test_validator_cloning():
|
|
|
1018
891
|
tx_receipt = contract.set_value(
|
|
1019
892
|
args=["new_value"],
|
|
1020
893
|
).transact(
|
|
1021
|
-
wait_interval=
|
|
894
|
+
wait_interval=2000, # Increase wait interval between status checks
|
|
1022
895
|
wait_retries=20, # Increase number of retry attempts
|
|
1023
896
|
)
|
|
1024
897
|
```
|
|
@@ -1035,7 +908,7 @@ def test_validator_cloning():
|
|
|
1035
908
|
# For critical operations, use more conservative settings
|
|
1036
909
|
contract = factory.deploy(
|
|
1037
910
|
consensus_max_rotations=10, # More rotations for better reliability
|
|
1038
|
-
wait_interval=
|
|
911
|
+
wait_interval=3000, # Longer wait between checks
|
|
1039
912
|
wait_retries=30 # More retries for consensus
|
|
1040
913
|
)
|
|
1041
914
|
```
|