runsight-core 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runsight_core-1.1.0/PKG-INFO +18 -0
- runsight_core-1.1.0/README.md +6 -0
- runsight_core-1.1.0/pyproject.toml +45 -0
- runsight_core-1.1.0/setup.cfg +4 -0
- runsight_core-1.1.0/src/runsight_core/__init__.py +44 -0
- runsight_core-1.1.0/src/runsight_core/artifacts.py +66 -0
- runsight_core-1.1.0/src/runsight_core/assertions/__init__.py +25 -0
- runsight_core-1.1.0/src/runsight_core/assertions/base.py +57 -0
- runsight_core-1.1.0/src/runsight_core/assertions/deterministic/__init__.py +65 -0
- runsight_core-1.1.0/src/runsight_core/assertions/deterministic/linguistic.py +129 -0
- runsight_core-1.1.0/src/runsight_core/assertions/deterministic/performance.py +58 -0
- runsight_core-1.1.0/src/runsight_core/assertions/deterministic/string.py +212 -0
- runsight_core-1.1.0/src/runsight_core/assertions/deterministic/structural.py +91 -0
- runsight_core-1.1.0/src/runsight_core/assertions/registry.py +159 -0
- runsight_core-1.1.0/src/runsight_core/assertions/scoring.py +42 -0
- runsight_core-1.1.0/src/runsight_core/blocks/__init__.py +28 -0
- runsight_core-1.1.0/src/runsight_core/blocks/_helpers.py +45 -0
- runsight_core-1.1.0/src/runsight_core/blocks/_registry.py +47 -0
- runsight_core-1.1.0/src/runsight_core/blocks/base.py +120 -0
- runsight_core-1.1.0/src/runsight_core/blocks/code.py +349 -0
- runsight_core-1.1.0/src/runsight_core/blocks/dispatch.py +238 -0
- runsight_core-1.1.0/src/runsight_core/blocks/gate.py +194 -0
- runsight_core-1.1.0/src/runsight_core/blocks/linear.py +122 -0
- runsight_core-1.1.0/src/runsight_core/blocks/loop.py +316 -0
- runsight_core-1.1.0/src/runsight_core/blocks/registry.py +48 -0
- runsight_core-1.1.0/src/runsight_core/blocks/synthesize.py +134 -0
- runsight_core-1.1.0/src/runsight_core/blocks/workflow_block.py +418 -0
- runsight_core-1.1.0/src/runsight_core/budget_enforcement.py +193 -0
- runsight_core-1.1.0/src/runsight_core/conditions/__init__.py +0 -0
- runsight_core-1.1.0/src/runsight_core/conditions/engine.py +315 -0
- runsight_core-1.1.0/src/runsight_core/eval/__init__.py +0 -0
- runsight_core-1.1.0/src/runsight_core/eval/runner.py +149 -0
- runsight_core-1.1.0/src/runsight_core/isolation/__init__.py +28 -0
- runsight_core-1.1.0/src/runsight_core/isolation/credentials.py +42 -0
- runsight_core-1.1.0/src/runsight_core/isolation/envelope.py +88 -0
- runsight_core-1.1.0/src/runsight_core/isolation/errors.py +13 -0
- runsight_core-1.1.0/src/runsight_core/isolation/handlers.py +141 -0
- runsight_core-1.1.0/src/runsight_core/isolation/harness.py +440 -0
- runsight_core-1.1.0/src/runsight_core/isolation/ipc.py +152 -0
- runsight_core-1.1.0/src/runsight_core/isolation/pool.py +21 -0
- runsight_core-1.1.0/src/runsight_core/isolation/worker.py +384 -0
- runsight_core-1.1.0/src/runsight_core/isolation/wrapper.py +227 -0
- runsight_core-1.1.0/src/runsight_core/llm/__init__.py +7 -0
- runsight_core-1.1.0/src/runsight_core/llm/client.py +125 -0
- runsight_core-1.1.0/src/runsight_core/llm/model_catalog.py +137 -0
- runsight_core-1.1.0/src/runsight_core/memory/__init__.py +7 -0
- runsight_core-1.1.0/src/runsight_core/memory/budget.py +389 -0
- runsight_core-1.1.0/src/runsight_core/memory/token_counting.py +13 -0
- runsight_core-1.1.0/src/runsight_core/memory/windowing.py +43 -0
- runsight_core-1.1.0/src/runsight_core/observer.py +539 -0
- runsight_core-1.1.0/src/runsight_core/primitives.py +197 -0
- runsight_core-1.1.0/src/runsight_core/py.typed +0 -0
- runsight_core-1.1.0/src/runsight_core/runner.py +408 -0
- runsight_core-1.1.0/src/runsight_core/security.py +71 -0
- runsight_core-1.1.0/src/runsight_core/state.py +78 -0
- runsight_core-1.1.0/src/runsight_core/tools/__init__.py +17 -0
- runsight_core-1.1.0/src/runsight_core/tools/_catalog.py +483 -0
- runsight_core-1.1.0/src/runsight_core/tools/delegate.py +53 -0
- runsight_core-1.1.0/src/runsight_core/tools/file_io.py +68 -0
- runsight_core-1.1.0/src/runsight_core/tools/http.py +68 -0
- runsight_core-1.1.0/src/runsight_core/workflow.py +867 -0
- runsight_core-1.1.0/src/runsight_core/yaml/__init__.py +20 -0
- runsight_core-1.1.0/src/runsight_core/yaml/discovery.py +450 -0
- runsight_core-1.1.0/src/runsight_core/yaml/parser.py +1032 -0
- runsight_core-1.1.0/src/runsight_core/yaml/registry.py +131 -0
- runsight_core-1.1.0/src/runsight_core/yaml/schema.py +526 -0
- runsight_core-1.1.0/src/runsight_core.egg-info/PKG-INFO +18 -0
- runsight_core-1.1.0/src/runsight_core.egg-info/SOURCES.txt +219 -0
- runsight_core-1.1.0/src/runsight_core.egg-info/dependency_links.txt +1 -0
- runsight_core-1.1.0/src/runsight_core.egg-info/requires.txt +13 -0
- runsight_core-1.1.0/src/runsight_core.egg-info/top_level.txt +1 -0
- runsight_core-1.1.0/tests/test_achat_budget_enforcement.py +419 -0
- runsight_core-1.1.0/tests/test_artifact_store_wiring.py +419 -0
- runsight_core-1.1.0/tests/test_artifacts.py +403 -0
- runsight_core-1.1.0/tests/test_base_block.py +149 -0
- runsight_core-1.1.0/tests/test_baseblock_artifact_helpers.py +351 -0
- runsight_core-1.1.0/tests/test_block_timeout_enforcement.py +194 -0
- runsight_core-1.1.0/tests/test_blocks.py +234 -0
- runsight_core-1.1.0/tests/test_budget_enforcement_types.py +541 -0
- runsight_core-1.1.0/tests/test_budget_limits_schema.py +594 -0
- runsight_core-1.1.0/tests/test_budget_migration_remaining.py +378 -0
- runsight_core-1.1.0/tests/test_budget_models.py +423 -0
- runsight_core-1.1.0/tests/test_budget_session.py +633 -0
- runsight_core-1.1.0/tests/test_budget_wiring.py +528 -0
- runsight_core-1.1.0/tests/test_carry_context_blockresult.py +554 -0
- runsight_core-1.1.0/tests/test_code_block.py +273 -0
- runsight_core-1.1.0/tests/test_codeblock_sandbox_hardening.py +215 -0
- runsight_core-1.1.0/tests/test_composite_observer_isolation.py +176 -0
- runsight_core-1.1.0/tests/test_condition_engine.py +604 -0
- runsight_core-1.1.0/tests/test_context_truncation.py +359 -0
- runsight_core-1.1.0/tests/test_conversation_histories.py +173 -0
- runsight_core-1.1.0/tests/test_cross_feature_integration.py +650 -0
- runsight_core-1.1.0/tests/test_custom_asset_tool_contract.py +59 -0
- runsight_core-1.1.0/tests/test_discovery.py +1010 -0
- runsight_core-1.1.0/tests/test_dispatch_block_stateful.py +1009 -0
- runsight_core-1.1.0/tests/test_dispatch_budget_isolation.py +639 -0
- runsight_core-1.1.0/tests/test_dispatch_exit_def.py +123 -0
- runsight_core-1.1.0/tests/test_dispatch_synthesize_integration.py +829 -0
- runsight_core-1.1.0/tests/test_dispatch_v2.py +1072 -0
- runsight_core-1.1.0/tests/test_e2e_block_timeout.py +489 -0
- runsight_core-1.1.0/tests/test_e2e_cost_cap.py +438 -0
- runsight_core-1.1.0/tests/test_e2e_dispatch_budget.py +484 -0
- runsight_core-1.1.0/tests/test_e2e_warn_and_flow_timeout.py +879 -0
- runsight_core-1.1.0/tests/test_fit_to_budget_phase1.py +449 -0
- runsight_core-1.1.0/tests/test_fit_to_budget_phase2.py +620 -0
- runsight_core-1.1.0/tests/test_gate_error_subclass.py +8 -0
- runsight_core-1.1.0/tests/test_gate_file_writer_blocks.py +168 -0
- runsight_core-1.1.0/tests/test_integration_blocks_workflow.py +395 -0
- runsight_core-1.1.0/tests/test_integration_merge_validation.py +86 -0
- runsight_core-1.1.0/tests/test_integration_runner_primitives.py +290 -0
- runsight_core-1.1.0/tests/test_integration_state_blocks.py +290 -0
- runsight_core-1.1.0/tests/test_integration_workflow.py +422 -0
- runsight_core-1.1.0/tests/test_integration_workflow_block.py +492 -0
- runsight_core-1.1.0/tests/test_integration_workflow_block_backward_compat.py +527 -0
- runsight_core-1.1.0/tests/test_integration_workflow_block_e2e.py +869 -0
- runsight_core-1.1.0/tests/test_integration_workflow_block_parser.py +548 -0
- runsight_core-1.1.0/tests/test_integration_workflow_block_with_other_blocks.py +413 -0
- runsight_core-1.1.0/tests/test_iso_001_envelope_models.py +560 -0
- runsight_core-1.1.0/tests/test_iso_002_ipc_protocol.py +953 -0
- runsight_core-1.1.0/tests/test_iso_003_harness.py +952 -0
- runsight_core-1.1.0/tests/test_iso_004_worker.py +731 -0
- runsight_core-1.1.0/tests/test_iso_005_block_migration.py +1143 -0
- runsight_core-1.1.0/tests/test_iso_006_dispatch_delegate.py +512 -0
- runsight_core-1.1.0/tests/test_iso_007_monitoring.py +673 -0
- runsight_core-1.1.0/tests/test_iso_008_credentials.py +764 -0
- runsight_core-1.1.0/tests/test_linearblock_stateful.py +605 -0
- runsight_core-1.1.0/tests/test_loop_block.py +830 -0
- runsight_core-1.1.0/tests/test_loop_break_conditions.py +873 -0
- runsight_core-1.1.0/tests/test_loop_carry_context.py +1391 -0
- runsight_core-1.1.0/tests/test_loop_exports_schema.py +490 -0
- runsight_core-1.1.0/tests/test_loop_workflow_validation.py +193 -0
- runsight_core-1.1.0/tests/test_loopblock_kwargs_forwarding.py +552 -0
- runsight_core-1.1.0/tests/test_loopblock_stateful_integration.py +855 -0
- runsight_core-1.1.0/tests/test_model_catalog.py +519 -0
- runsight_core-1.1.0/tests/test_observer.py +135 -0
- runsight_core-1.1.0/tests/test_observer_soul_extension.py +202 -0
- runsight_core-1.1.0/tests/test_parser_inputs_outputs.py +884 -0
- runsight_core-1.1.0/tests/test_parser_workflow_block.py +432 -0
- runsight_core-1.1.0/tests/test_primitives.py +24 -0
- runsight_core-1.1.0/tests/test_primitives_extended.py +220 -0
- runsight_core-1.1.0/tests/test_prompt_hash.py +151 -0
- runsight_core-1.1.0/tests/test_registry.py +128 -0
- runsight_core-1.1.0/tests/test_remove_placeholder_block.py +170 -0
- runsight_core-1.1.0/tests/test_retry_config.py +554 -0
- runsight_core-1.1.0/tests/test_retry_execution.py +692 -0
- runsight_core-1.1.0/tests/test_retry_stateful.py +611 -0
- runsight_core-1.1.0/tests/test_retryblock_migration.py +328 -0
- runsight_core-1.1.0/tests/test_run126_code_block_parser_and_achat.py +307 -0
- runsight_core-1.1.0/tests/test_run127_runner_get_client_api_key.py +58 -0
- runsight_core-1.1.0/tests/test_run137_async_subprocess.py +323 -0
- runsight_core-1.1.0/tests/test_run141_multi_provider_keys.py +185 -0
- runsight_core-1.1.0/tests/test_run170_complex_read_sites.py +279 -0
- runsight_core-1.1.0/tests/test_run177_block_result.py +207 -0
- runsight_core-1.1.0/tests/test_run178_write_sites_block_result.py +490 -0
- runsight_core-1.1.0/tests/test_run179_strict_block_result.py +158 -0
- runsight_core-1.1.0/tests/test_run181_read_site_migration.py +310 -0
- runsight_core-1.1.0/tests/test_run219_auto_registration.py +575 -0
- runsight_core-1.1.0/tests/test_run222_migrate_blocks.py +359 -0
- runsight_core-1.1.0/tests/test_run377_yaml_enabled.py +101 -0
- runsight_core-1.1.0/tests/test_run415_no_builtin_souls.py +104 -0
- runsight_core-1.1.0/tests/test_run468_parser_soul_field_forwarding.py +107 -0
- runsight_core-1.1.0/tests/test_run469_discover_soul_fields.py +117 -0
- runsight_core-1.1.0/tests/test_run569_project_root_resolution.py +248 -0
- runsight_core-1.1.0/tests/test_run570_kill_inline_souls.py +278 -0
- runsight_core-1.1.0/tests/test_run571_wire_soul_ref_to_library.py +654 -0
- runsight_core-1.1.0/tests/test_run572_library_soul_tool_governance.py +643 -0
- runsight_core-1.1.0/tests/test_run603_workflow_interface_schema.py +96 -0
- runsight_core-1.1.0/tests/test_run604_interface_execution.py +422 -0
- runsight_core-1.1.0/tests/test_run605_on_error_modes.py +349 -0
- runsight_core-1.1.0/tests/test_run606_runtime_depth_parity.py +330 -0
- runsight_core-1.1.0/tests/test_run614_integration_subworkflow.py +804 -0
- runsight_core-1.1.0/tests/test_run628_noise_cleanup_verification.py +410 -0
- runsight_core-1.1.0/tests/test_run629_dispatch_e2e.py +535 -0
- runsight_core-1.1.0/tests/test_run644_dispatch_runtime_rename.py +96 -0
- runsight_core-1.1.0/tests/test_run645_dispatch_schema_canonicalization.py +81 -0
- runsight_core-1.1.0/tests/test_run663_child_observer_wrapper.py +315 -0
- runsight_core-1.1.0/tests/test_run663_parser_round_trip.py +102 -0
- runsight_core-1.1.0/tests/test_run668_depends_error_routes.py +399 -0
- runsight_core-1.1.0/tests/test_run669_gate_shortcuts.py +222 -0
- runsight_core-1.1.0/tests/test_run670_error_route_runtime.py +261 -0
- runsight_core-1.1.0/tests/test_run671_routes_shorthand.py +573 -0
- runsight_core-1.1.0/tests/test_run675_block_execution_context.py +68 -0
- runsight_core-1.1.0/tests/test_run676_execute_block_extraction.py +234 -0
- runsight_core-1.1.0/tests/test_run677_workflow_run_execute_block_wiring.py +334 -0
- runsight_core-1.1.0/tests/test_run678_loop_execute_block_wiring.py +253 -0
- runsight_core-1.1.0/tests/test_run680_codeblock_exit_handle.py +275 -0
- runsight_core-1.1.0/tests/test_run681_linearblock_exit_conditions.py +418 -0
- runsight_core-1.1.0/tests/test_run682_workflowblock_loopblock_e2e.py +603 -0
- runsight_core-1.1.0/tests/test_run683_nested_loopblock_observer.py +521 -0
- runsight_core-1.1.0/tests/test_run684_exit_handle_all_block_types.py +730 -0
- runsight_core-1.1.0/tests/test_run685_eval_debt_integration.py +573 -0
- runsight_core-1.1.0/tests/test_run688_soul_assertions_cleanup.py +122 -0
- runsight_core-1.1.0/tests/test_run690_delete_duplicate_resolve_soul.py +93 -0
- runsight_core-1.1.0/tests/test_run692_inline_soul_fixture_migration.py +230 -0
- runsight_core-1.1.0/tests/test_run693_step_wrapper_assertions.py +274 -0
- runsight_core-1.1.0/tests/test_run694_eval_yaml_schema.py +434 -0
- runsight_core-1.1.0/tests/test_run695_eval_runner.py +756 -0
- runsight_core-1.1.0/tests/test_run699_eval_integration.py +829 -0
- runsight_core-1.1.0/tests/test_run700_eval_e2e.py +854 -0
- runsight_core-1.1.0/tests/test_run701_state_isolation_verification.py +172 -0
- runsight_core-1.1.0/tests/test_run702_mixed_pipeline_e2e.py +493 -0
- runsight_core-1.1.0/tests/test_run703_dispatch_in_loop_e2e.py +790 -0
- runsight_core-1.1.0/tests/test_run704_error_route_output_mapping_e2e.py +720 -0
- runsight_core-1.1.0/tests/test_runner.py +247 -0
- runsight_core-1.1.0/tests/test_runner_messages.py +112 -0
- runsight_core-1.1.0/tests/test_sandbox_hardening.py +541 -0
- runsight_core-1.1.0/tests/test_schema.py +145 -0
- runsight_core-1.1.0/tests/test_schema_validation.py +463 -0
- runsight_core-1.1.0/tests/test_state.py +103 -0
- runsight_core-1.1.0/tests/test_tool_integration.py +2225 -0
- runsight_core-1.1.0/tests/test_tool_registry.py +413 -0
- runsight_core-1.1.0/tests/test_windowing.py +248 -0
- runsight_core-1.1.0/tests/test_workflow.py +818 -0
- runsight_core-1.1.0/tests/test_workflow_block_execute.py +335 -0
- runsight_core-1.1.0/tests/test_workflow_block_recursion.py +141 -0
- runsight_core-1.1.0/tests/test_workflow_defensive_observer.py +237 -0
- runsight_core-1.1.0/tests/test_workflow_output_conditions.py +819 -0
- runsight_core-1.1.0/tests/test_yaml_assertions_config.py +191 -0
- runsight_core-1.1.0/tests/test_yaml_dx_e2e.py +449 -0
- runsight_core-1.1.0/tests/test_yaml_dx_sugar.py +868 -0
- runsight_core-1.1.0/tests/test_yaml_parser.py +834 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: runsight-core
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Runsight Agent OS Core Engine
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: pydantic>=2.0
|
|
8
|
+
Requires-Dist: litellm>=1.0.0
|
|
9
|
+
Requires-Dist: openai>=1.0.0
|
|
10
|
+
Requires-Dist: httpx>=0.27
|
|
11
|
+
Requires-Dist: jsonschema>=4.0
|
|
12
|
+
Requires-Dist: editdistance>=0.6
|
|
13
|
+
Requires-Dist: rouge-score>=0.1
|
|
14
|
+
Requires-Dist: jsonpath-ng>=1.6
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
17
|
+
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
18
|
+
Requires-Dist: respx>=0.22; extra == "dev"
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=75.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "runsight-core"
|
|
7
|
+
version = "1.1.0"
|
|
8
|
+
description = "Runsight Agent OS Core Engine"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"pydantic>=2.0",
|
|
13
|
+
"litellm>=1.0.0",
|
|
14
|
+
"openai>=1.0.0",
|
|
15
|
+
"httpx>=0.27",
|
|
16
|
+
"jsonschema>=4.0",
|
|
17
|
+
"editdistance>=0.6",
|
|
18
|
+
"rouge-score>=0.1",
|
|
19
|
+
"jsonpath-ng>=1.6",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.optional-dependencies]
|
|
23
|
+
dev = [
|
|
24
|
+
"pytest-asyncio>=0.23",
|
|
25
|
+
"httpx>=0.27",
|
|
26
|
+
"respx>=0.22",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.packages.find]
|
|
30
|
+
where = ["src"]
|
|
31
|
+
include = ["runsight_core*"]
|
|
32
|
+
|
|
33
|
+
[tool.pytest.ini_options]
|
|
34
|
+
asyncio_mode = "auto"
|
|
35
|
+
|
|
36
|
+
[tool.mypy]
|
|
37
|
+
python_version = "3.11"
|
|
38
|
+
strict = true
|
|
39
|
+
warn_unused_configs = true
|
|
40
|
+
disallow_untyped_defs = true
|
|
41
|
+
disallow_any_unimported = true
|
|
42
|
+
no_implicit_optional = true
|
|
43
|
+
warn_redundant_casts = true
|
|
44
|
+
warn_unused_ignores = true
|
|
45
|
+
warn_no_return = true
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Runsight Agent OS Core Engine
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .blocks.base import BaseBlock
|
|
6
|
+
from .blocks.code import CodeBlock
|
|
7
|
+
from .blocks.dispatch import DispatchBlock
|
|
8
|
+
from .blocks.gate import GateBlock
|
|
9
|
+
from .blocks.linear import LinearBlock
|
|
10
|
+
from .blocks.loop import CarryContextConfig, LoopBlock, LoopBlockDef
|
|
11
|
+
from .blocks.registry import BlockFactory, BlockRegistry
|
|
12
|
+
from .blocks.synthesize import SynthesizeBlock
|
|
13
|
+
from .blocks.workflow_block import WorkflowBlock
|
|
14
|
+
from .primitives import Soul, Step, Task
|
|
15
|
+
from .runner import ExecutionResult, RunsightTeamRunner
|
|
16
|
+
from .state import BlockResult, WorkflowState
|
|
17
|
+
from .workflow import Workflow
|
|
18
|
+
from .yaml import parse_workflow_yaml
|
|
19
|
+
from .yaml.schema import RetryConfig
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"Soul",
|
|
23
|
+
"Task",
|
|
24
|
+
"Step",
|
|
25
|
+
"RunsightTeamRunner",
|
|
26
|
+
"ExecutionResult",
|
|
27
|
+
"BlockResult",
|
|
28
|
+
"WorkflowState",
|
|
29
|
+
"BaseBlock",
|
|
30
|
+
"LinearBlock",
|
|
31
|
+
"DispatchBlock",
|
|
32
|
+
"SynthesizeBlock",
|
|
33
|
+
"LoopBlock",
|
|
34
|
+
"GateBlock",
|
|
35
|
+
"WorkflowBlock",
|
|
36
|
+
"CodeBlock",
|
|
37
|
+
"BlockRegistry",
|
|
38
|
+
"BlockFactory",
|
|
39
|
+
"Workflow",
|
|
40
|
+
"parse_workflow_yaml",
|
|
41
|
+
"LoopBlockDef",
|
|
42
|
+
"RetryConfig",
|
|
43
|
+
"CarryContextConfig",
|
|
44
|
+
]
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ArtifactStore ABC and InMemoryArtifactStore for workflow artifact management.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ArtifactStore(ABC):
|
|
10
|
+
"""Abstract base class for artifact storage backends."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, run_id: str) -> None:
|
|
13
|
+
self.run_id = run_id
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def write(
|
|
17
|
+
self, key: str, content: str, *, metadata: Optional[Dict[str, Any]] = None
|
|
18
|
+
) -> str: ...
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
async def read(self, ref: str) -> str: ...
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
async def list_artifacts(self) -> List[Dict[str, Any]]: ...
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
async def cleanup(self) -> None: ...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class InMemoryArtifactStore(ArtifactStore):
|
|
31
|
+
"""In-memory artifact store using mem://{run_id}/{key} refs."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, run_id: str) -> None:
|
|
34
|
+
super().__init__(run_id)
|
|
35
|
+
self._content: Dict[str, str] = {}
|
|
36
|
+
self._metadata: Dict[str, Optional[Dict[str, Any]]] = {}
|
|
37
|
+
|
|
38
|
+
async def write(
|
|
39
|
+
self, key: str, content: str, *, metadata: Optional[Dict[str, Any]] = None
|
|
40
|
+
) -> str:
|
|
41
|
+
self._content[key] = content
|
|
42
|
+
self._metadata[key] = metadata
|
|
43
|
+
return f"mem://{self.run_id}/{key}"
|
|
44
|
+
|
|
45
|
+
async def read(self, ref: str) -> str:
|
|
46
|
+
prefix = f"mem://{self.run_id}/"
|
|
47
|
+
if not ref.startswith(prefix):
|
|
48
|
+
raise KeyError(ref)
|
|
49
|
+
key = ref[len(prefix) :]
|
|
50
|
+
if key not in self._content:
|
|
51
|
+
raise KeyError(ref)
|
|
52
|
+
return self._content[key]
|
|
53
|
+
|
|
54
|
+
async def list_artifacts(self) -> List[Dict[str, Any]]:
|
|
55
|
+
return [
|
|
56
|
+
{
|
|
57
|
+
"key": key,
|
|
58
|
+
"ref": f"mem://{self.run_id}/{key}",
|
|
59
|
+
"metadata": self._metadata[key],
|
|
60
|
+
}
|
|
61
|
+
for key in self._content
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
async def cleanup(self) -> None:
|
|
65
|
+
self._content.clear()
|
|
66
|
+
self._metadata.clear()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Assertion plugin interface for runsight_core."""
|
|
2
|
+
|
|
3
|
+
from runsight_core.assertions.base import (
|
|
4
|
+
Assertion,
|
|
5
|
+
AssertionContext,
|
|
6
|
+
GradingResult,
|
|
7
|
+
TokenUsage,
|
|
8
|
+
)
|
|
9
|
+
from runsight_core.assertions.registry import (
|
|
10
|
+
register_assertion,
|
|
11
|
+
run_assertion,
|
|
12
|
+
run_assertions,
|
|
13
|
+
)
|
|
14
|
+
from runsight_core.assertions.scoring import AssertionsResult
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"Assertion",
|
|
18
|
+
"AssertionContext",
|
|
19
|
+
"AssertionsResult",
|
|
20
|
+
"GradingResult",
|
|
21
|
+
"TokenUsage",
|
|
22
|
+
"register_assertion",
|
|
23
|
+
"run_assertion",
|
|
24
|
+
"run_assertions",
|
|
25
|
+
]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Base models for the assertion plugin interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any, Protocol, runtime_checkable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class TokenUsage:
|
|
11
|
+
"""Token usage breakdown for an assertion evaluation."""
|
|
12
|
+
|
|
13
|
+
prompt: int = 0
|
|
14
|
+
completion: int = 0
|
|
15
|
+
total: int = 0
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class GradingResult:
|
|
20
|
+
"""Result of a single assertion evaluation."""
|
|
21
|
+
|
|
22
|
+
passed: bool
|
|
23
|
+
score: float
|
|
24
|
+
reason: str
|
|
25
|
+
named_scores: dict[str, float] = field(default_factory=dict)
|
|
26
|
+
tokens_used: TokenUsage | None = None
|
|
27
|
+
component_results: list[GradingResult] = field(default_factory=list)
|
|
28
|
+
assertion_type: str | None = None
|
|
29
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class AssertionContext:
|
|
34
|
+
"""Context provided to assertion evaluators."""
|
|
35
|
+
|
|
36
|
+
output: str
|
|
37
|
+
prompt: str
|
|
38
|
+
prompt_hash: str
|
|
39
|
+
soul_id: str
|
|
40
|
+
soul_version: str
|
|
41
|
+
block_id: str
|
|
42
|
+
block_type: str
|
|
43
|
+
cost_usd: float
|
|
44
|
+
total_tokens: int
|
|
45
|
+
latency_ms: float
|
|
46
|
+
variables: dict[str, Any]
|
|
47
|
+
run_id: str
|
|
48
|
+
workflow_id: str
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@runtime_checkable
|
|
52
|
+
class Assertion(Protocol):
|
|
53
|
+
"""Protocol that assertion plugins must satisfy."""
|
|
54
|
+
|
|
55
|
+
type: str
|
|
56
|
+
|
|
57
|
+
def evaluate(self, output: str, context: AssertionContext) -> GradingResult: ...
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Deterministic assertion plugins — registers all 15 types on import."""
|
|
2
|
+
|
|
3
|
+
from runsight_core.assertions.deterministic.linguistic import (
|
|
4
|
+
BleuAssertion,
|
|
5
|
+
LevenshteinAssertion,
|
|
6
|
+
RougeNAssertion,
|
|
7
|
+
)
|
|
8
|
+
from runsight_core.assertions.deterministic.performance import (
|
|
9
|
+
CostAssertion,
|
|
10
|
+
LatencyAssertion,
|
|
11
|
+
)
|
|
12
|
+
from runsight_core.assertions.deterministic.string import (
|
|
13
|
+
ContainsAllAssertion,
|
|
14
|
+
ContainsAnyAssertion,
|
|
15
|
+
ContainsAssertion,
|
|
16
|
+
EqualsAssertion,
|
|
17
|
+
IContainsAssertion,
|
|
18
|
+
RegexAssertion,
|
|
19
|
+
StartsWithAssertion,
|
|
20
|
+
WordCountAssertion,
|
|
21
|
+
)
|
|
22
|
+
from runsight_core.assertions.deterministic.structural import (
|
|
23
|
+
ContainsJsonAssertion,
|
|
24
|
+
IsJsonAssertion,
|
|
25
|
+
)
|
|
26
|
+
from runsight_core.assertions.registry import register_assertion
|
|
27
|
+
|
|
28
|
+
_ALL_ASSERTIONS: list[type] = [
|
|
29
|
+
EqualsAssertion,
|
|
30
|
+
ContainsAssertion,
|
|
31
|
+
IContainsAssertion,
|
|
32
|
+
ContainsAllAssertion,
|
|
33
|
+
ContainsAnyAssertion,
|
|
34
|
+
StartsWithAssertion,
|
|
35
|
+
RegexAssertion,
|
|
36
|
+
WordCountAssertion,
|
|
37
|
+
IsJsonAssertion,
|
|
38
|
+
ContainsJsonAssertion,
|
|
39
|
+
CostAssertion,
|
|
40
|
+
LatencyAssertion,
|
|
41
|
+
LevenshteinAssertion,
|
|
42
|
+
BleuAssertion,
|
|
43
|
+
RougeNAssertion,
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
for _cls in _ALL_ASSERTIONS:
|
|
47
|
+
register_assertion(_cls.type, _cls)
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
"BleuAssertion",
|
|
51
|
+
"ContainsAllAssertion",
|
|
52
|
+
"ContainsAnyAssertion",
|
|
53
|
+
"ContainsAssertion",
|
|
54
|
+
"ContainsJsonAssertion",
|
|
55
|
+
"CostAssertion",
|
|
56
|
+
"EqualsAssertion",
|
|
57
|
+
"IContainsAssertion",
|
|
58
|
+
"IsJsonAssertion",
|
|
59
|
+
"LatencyAssertion",
|
|
60
|
+
"LevenshteinAssertion",
|
|
61
|
+
"RegexAssertion",
|
|
62
|
+
"RougeNAssertion",
|
|
63
|
+
"StartsWithAssertion",
|
|
64
|
+
"WordCountAssertion",
|
|
65
|
+
]
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Deterministic linguistic assertion plugins.
|
|
2
|
+
|
|
3
|
+
Covers: levenshtein, bleu, rouge-n.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import math
|
|
9
|
+
from collections import Counter
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import editdistance
|
|
13
|
+
from rouge_score import rouge_scorer
|
|
14
|
+
|
|
15
|
+
from runsight_core.assertions.base import AssertionContext, GradingResult
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LevenshteinAssertion:
|
|
19
|
+
"""Edit distance <= threshold."""
|
|
20
|
+
|
|
21
|
+
type = "levenshtein"
|
|
22
|
+
|
|
23
|
+
def __init__(self, value: Any = "", threshold: float | None = None) -> None:
|
|
24
|
+
self.value = str(value)
|
|
25
|
+
self.threshold = threshold if threshold is not None else 5
|
|
26
|
+
|
|
27
|
+
def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
|
|
28
|
+
distance = editdistance.eval(output, self.value)
|
|
29
|
+
passed = distance <= self.threshold
|
|
30
|
+
score = 1.0 if passed else 0.0
|
|
31
|
+
reason = f"Levenshtein distance is {distance} (threshold {self.threshold})"
|
|
32
|
+
return GradingResult(passed=passed, score=score, reason=reason)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BleuAssertion:
|
|
36
|
+
"""BLEU-4 score >= threshold. Inline implementation (no nltk)."""
|
|
37
|
+
|
|
38
|
+
type = "bleu"
|
|
39
|
+
|
|
40
|
+
def __init__(self, value: Any = "", threshold: float | None = None) -> None:
|
|
41
|
+
self.value = str(value)
|
|
42
|
+
self.threshold = threshold if threshold is not None else 0.5
|
|
43
|
+
|
|
44
|
+
def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
|
|
45
|
+
score = _compute_bleu(reference=self.value, candidate=output)
|
|
46
|
+
passed = score >= self.threshold
|
|
47
|
+
reason = f"BLEU score {score:.4f} {'>='} threshold {self.threshold}"
|
|
48
|
+
if not passed:
|
|
49
|
+
reason = f"BLEU score {score:.4f} < threshold {self.threshold}"
|
|
50
|
+
return GradingResult(passed=passed, score=score, reason=reason)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class RougeNAssertion:
|
|
54
|
+
"""ROUGE-N score >= threshold using rouge-score library."""
|
|
55
|
+
|
|
56
|
+
type = "rouge-n"
|
|
57
|
+
|
|
58
|
+
def __init__(self, value: Any = "", threshold: float | None = None) -> None:
|
|
59
|
+
self.value = str(value)
|
|
60
|
+
self.threshold = threshold if threshold is not None else 0.75
|
|
61
|
+
|
|
62
|
+
def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
|
|
63
|
+
if not output or not self.value:
|
|
64
|
+
score = 0.0
|
|
65
|
+
else:
|
|
66
|
+
scorer = rouge_scorer.RougeScorer(["rouge1"], use_stemmer=False)
|
|
67
|
+
scores = scorer.score(self.value, output)
|
|
68
|
+
score = scores["rouge1"].fmeasure
|
|
69
|
+
|
|
70
|
+
passed = score >= self.threshold
|
|
71
|
+
if passed:
|
|
72
|
+
reason = f"ROUGE-N score {score:.4f} >= threshold {self.threshold}"
|
|
73
|
+
else:
|
|
74
|
+
reason = f"ROUGE-N score {score:.4f} < threshold {self.threshold}"
|
|
75
|
+
return GradingResult(passed=passed, score=score, reason=reason)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ── Inline BLEU-4 implementation ────────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _get_ngrams(tokens: list[str], n: int) -> Counter[tuple[str, ...]]:
|
|
82
|
+
"""Extract n-grams from a token list."""
|
|
83
|
+
return Counter(tuple(tokens[i : i + n]) for i in range(len(tokens) - n + 1))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _compute_bleu(reference: str, candidate: str, max_n: int = 4) -> float:
|
|
87
|
+
"""Compute BLEU score with smoothing (method 1: add 1 to numerator/denominator).
|
|
88
|
+
|
|
89
|
+
Ported from promptfoo's BLEU implementation.
|
|
90
|
+
"""
|
|
91
|
+
ref_tokens = reference.lower().split()
|
|
92
|
+
cand_tokens = candidate.lower().split()
|
|
93
|
+
|
|
94
|
+
if not cand_tokens:
|
|
95
|
+
return 0.0
|
|
96
|
+
if not ref_tokens:
|
|
97
|
+
return 0.0
|
|
98
|
+
|
|
99
|
+
# Brevity penalty
|
|
100
|
+
bp = 1.0
|
|
101
|
+
if len(cand_tokens) < len(ref_tokens):
|
|
102
|
+
bp = math.exp(1.0 - len(ref_tokens) / len(cand_tokens))
|
|
103
|
+
|
|
104
|
+
# Modified precision for each n-gram order with smoothing
|
|
105
|
+
log_avg = 0.0
|
|
106
|
+
for n in range(1, max_n + 1):
|
|
107
|
+
ref_ngrams = _get_ngrams(ref_tokens, n)
|
|
108
|
+
cand_ngrams = _get_ngrams(cand_tokens, n)
|
|
109
|
+
|
|
110
|
+
# Clipped counts
|
|
111
|
+
clipped = 0
|
|
112
|
+
total = 0
|
|
113
|
+
for ngram, count in cand_ngrams.items():
|
|
114
|
+
clipped += min(count, ref_ngrams.get(ngram, 0))
|
|
115
|
+
total += count
|
|
116
|
+
|
|
117
|
+
# Smoothing: add 1 to both numerator and denominator when n > 1
|
|
118
|
+
if n == 1:
|
|
119
|
+
if total == 0:
|
|
120
|
+
return 0.0
|
|
121
|
+
precision = clipped / total
|
|
122
|
+
if precision == 0:
|
|
123
|
+
return 0.0
|
|
124
|
+
else:
|
|
125
|
+
precision = (clipped + 1) / (total + 1)
|
|
126
|
+
|
|
127
|
+
log_avg += math.log(precision) / max_n
|
|
128
|
+
|
|
129
|
+
return bp * math.exp(log_avg)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Deterministic performance assertion plugins.
|
|
2
|
+
|
|
3
|
+
Covers: cost, latency.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from runsight_core.assertions.base import AssertionContext, GradingResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CostAssertion:
|
|
14
|
+
"""Check that cost_usd from context is within threshold."""
|
|
15
|
+
|
|
16
|
+
type = "cost"
|
|
17
|
+
|
|
18
|
+
def __init__(self, value: Any = None, threshold: float | None = None) -> None:
|
|
19
|
+
self.value = value
|
|
20
|
+
self.threshold = threshold
|
|
21
|
+
|
|
22
|
+
def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
|
|
23
|
+
cost = context.cost_usd
|
|
24
|
+
threshold = self.threshold if self.threshold is not None else 0.0
|
|
25
|
+
if cost <= threshold:
|
|
26
|
+
return GradingResult(
|
|
27
|
+
passed=True,
|
|
28
|
+
score=1.0,
|
|
29
|
+
reason=f"Cost ${cost:.4f} is within threshold ${threshold:.4f}",
|
|
30
|
+
)
|
|
31
|
+
return GradingResult(
|
|
32
|
+
passed=False, score=0.0, reason=f"Cost ${cost:.4f} exceeds threshold ${threshold:.4f}"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class LatencyAssertion:
|
|
37
|
+
"""Check that latency_ms from context is within threshold."""
|
|
38
|
+
|
|
39
|
+
type = "latency"
|
|
40
|
+
|
|
41
|
+
def __init__(self, value: Any = None, threshold: float | None = None) -> None:
|
|
42
|
+
self.value = value
|
|
43
|
+
self.threshold = threshold
|
|
44
|
+
|
|
45
|
+
def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
|
|
46
|
+
latency = context.latency_ms
|
|
47
|
+
threshold = self.threshold if self.threshold is not None else 0.0
|
|
48
|
+
if latency <= threshold:
|
|
49
|
+
return GradingResult(
|
|
50
|
+
passed=True,
|
|
51
|
+
score=1.0,
|
|
52
|
+
reason=f"Latency {latency:.1f}ms is within threshold {threshold:.1f}ms",
|
|
53
|
+
)
|
|
54
|
+
return GradingResult(
|
|
55
|
+
passed=False,
|
|
56
|
+
score=0.0,
|
|
57
|
+
reason=f"Latency {latency:.1f}ms exceeds threshold {threshold:.1f}ms",
|
|
58
|
+
)
|