runsight-core 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. runsight_core-0.1.2/PKG-INFO +18 -0
  2. runsight_core-0.1.2/README.md +6 -0
  3. runsight_core-0.1.2/pyproject.toml +45 -0
  4. runsight_core-0.1.2/setup.cfg +4 -0
  5. runsight_core-0.1.2/src/runsight_core/__init__.py +44 -0
  6. runsight_core-0.1.2/src/runsight_core/artifacts.py +66 -0
  7. runsight_core-0.1.2/src/runsight_core/assertions/__init__.py +25 -0
  8. runsight_core-0.1.2/src/runsight_core/assertions/base.py +57 -0
  9. runsight_core-0.1.2/src/runsight_core/assertions/deterministic/__init__.py +65 -0
  10. runsight_core-0.1.2/src/runsight_core/assertions/deterministic/linguistic.py +129 -0
  11. runsight_core-0.1.2/src/runsight_core/assertions/deterministic/performance.py +58 -0
  12. runsight_core-0.1.2/src/runsight_core/assertions/deterministic/string.py +212 -0
  13. runsight_core-0.1.2/src/runsight_core/assertions/deterministic/structural.py +91 -0
  14. runsight_core-0.1.2/src/runsight_core/assertions/registry.py +159 -0
  15. runsight_core-0.1.2/src/runsight_core/assertions/scoring.py +42 -0
  16. runsight_core-0.1.2/src/runsight_core/blocks/__init__.py +28 -0
  17. runsight_core-0.1.2/src/runsight_core/blocks/_helpers.py +45 -0
  18. runsight_core-0.1.2/src/runsight_core/blocks/_registry.py +47 -0
  19. runsight_core-0.1.2/src/runsight_core/blocks/base.py +120 -0
  20. runsight_core-0.1.2/src/runsight_core/blocks/code.py +349 -0
  21. runsight_core-0.1.2/src/runsight_core/blocks/dispatch.py +238 -0
  22. runsight_core-0.1.2/src/runsight_core/blocks/gate.py +194 -0
  23. runsight_core-0.1.2/src/runsight_core/blocks/linear.py +122 -0
  24. runsight_core-0.1.2/src/runsight_core/blocks/loop.py +316 -0
  25. runsight_core-0.1.2/src/runsight_core/blocks/registry.py +48 -0
  26. runsight_core-0.1.2/src/runsight_core/blocks/synthesize.py +134 -0
  27. runsight_core-0.1.2/src/runsight_core/blocks/workflow_block.py +418 -0
  28. runsight_core-0.1.2/src/runsight_core/budget_enforcement.py +193 -0
  29. runsight_core-0.1.2/src/runsight_core/conditions/__init__.py +0 -0
  30. runsight_core-0.1.2/src/runsight_core/conditions/engine.py +315 -0
  31. runsight_core-0.1.2/src/runsight_core/eval/__init__.py +0 -0
  32. runsight_core-0.1.2/src/runsight_core/eval/runner.py +149 -0
  33. runsight_core-0.1.2/src/runsight_core/isolation/__init__.py +28 -0
  34. runsight_core-0.1.2/src/runsight_core/isolation/credentials.py +42 -0
  35. runsight_core-0.1.2/src/runsight_core/isolation/envelope.py +88 -0
  36. runsight_core-0.1.2/src/runsight_core/isolation/errors.py +13 -0
  37. runsight_core-0.1.2/src/runsight_core/isolation/handlers.py +141 -0
  38. runsight_core-0.1.2/src/runsight_core/isolation/harness.py +440 -0
  39. runsight_core-0.1.2/src/runsight_core/isolation/ipc.py +152 -0
  40. runsight_core-0.1.2/src/runsight_core/isolation/pool.py +21 -0
  41. runsight_core-0.1.2/src/runsight_core/isolation/worker.py +384 -0
  42. runsight_core-0.1.2/src/runsight_core/isolation/wrapper.py +227 -0
  43. runsight_core-0.1.2/src/runsight_core/llm/__init__.py +7 -0
  44. runsight_core-0.1.2/src/runsight_core/llm/client.py +125 -0
  45. runsight_core-0.1.2/src/runsight_core/llm/model_catalog.py +137 -0
  46. runsight_core-0.1.2/src/runsight_core/memory/__init__.py +7 -0
  47. runsight_core-0.1.2/src/runsight_core/memory/budget.py +389 -0
  48. runsight_core-0.1.2/src/runsight_core/memory/token_counting.py +13 -0
  49. runsight_core-0.1.2/src/runsight_core/memory/windowing.py +43 -0
  50. runsight_core-0.1.2/src/runsight_core/observer.py +539 -0
  51. runsight_core-0.1.2/src/runsight_core/primitives.py +197 -0
  52. runsight_core-0.1.2/src/runsight_core/py.typed +0 -0
  53. runsight_core-0.1.2/src/runsight_core/runner.py +408 -0
  54. runsight_core-0.1.2/src/runsight_core/security.py +71 -0
  55. runsight_core-0.1.2/src/runsight_core/state.py +78 -0
  56. runsight_core-0.1.2/src/runsight_core/tools/__init__.py +17 -0
  57. runsight_core-0.1.2/src/runsight_core/tools/_catalog.py +483 -0
  58. runsight_core-0.1.2/src/runsight_core/tools/delegate.py +53 -0
  59. runsight_core-0.1.2/src/runsight_core/tools/file_io.py +68 -0
  60. runsight_core-0.1.2/src/runsight_core/tools/http.py +68 -0
  61. runsight_core-0.1.2/src/runsight_core/workflow.py +867 -0
  62. runsight_core-0.1.2/src/runsight_core/yaml/__init__.py +20 -0
  63. runsight_core-0.1.2/src/runsight_core/yaml/discovery.py +450 -0
  64. runsight_core-0.1.2/src/runsight_core/yaml/parser.py +1032 -0
  65. runsight_core-0.1.2/src/runsight_core/yaml/registry.py +131 -0
  66. runsight_core-0.1.2/src/runsight_core/yaml/schema.py +526 -0
  67. runsight_core-0.1.2/src/runsight_core.egg-info/PKG-INFO +18 -0
  68. runsight_core-0.1.2/src/runsight_core.egg-info/SOURCES.txt +219 -0
  69. runsight_core-0.1.2/src/runsight_core.egg-info/dependency_links.txt +1 -0
  70. runsight_core-0.1.2/src/runsight_core.egg-info/requires.txt +13 -0
  71. runsight_core-0.1.2/src/runsight_core.egg-info/top_level.txt +1 -0
  72. runsight_core-0.1.2/tests/test_achat_budget_enforcement.py +419 -0
  73. runsight_core-0.1.2/tests/test_artifact_store_wiring.py +419 -0
  74. runsight_core-0.1.2/tests/test_artifacts.py +403 -0
  75. runsight_core-0.1.2/tests/test_base_block.py +149 -0
  76. runsight_core-0.1.2/tests/test_baseblock_artifact_helpers.py +351 -0
  77. runsight_core-0.1.2/tests/test_block_timeout_enforcement.py +194 -0
  78. runsight_core-0.1.2/tests/test_blocks.py +234 -0
  79. runsight_core-0.1.2/tests/test_budget_enforcement_types.py +541 -0
  80. runsight_core-0.1.2/tests/test_budget_limits_schema.py +594 -0
  81. runsight_core-0.1.2/tests/test_budget_migration_remaining.py +378 -0
  82. runsight_core-0.1.2/tests/test_budget_models.py +423 -0
  83. runsight_core-0.1.2/tests/test_budget_session.py +633 -0
  84. runsight_core-0.1.2/tests/test_budget_wiring.py +528 -0
  85. runsight_core-0.1.2/tests/test_carry_context_blockresult.py +554 -0
  86. runsight_core-0.1.2/tests/test_code_block.py +273 -0
  87. runsight_core-0.1.2/tests/test_codeblock_sandbox_hardening.py +215 -0
  88. runsight_core-0.1.2/tests/test_composite_observer_isolation.py +176 -0
  89. runsight_core-0.1.2/tests/test_condition_engine.py +604 -0
  90. runsight_core-0.1.2/tests/test_context_truncation.py +359 -0
  91. runsight_core-0.1.2/tests/test_conversation_histories.py +173 -0
  92. runsight_core-0.1.2/tests/test_cross_feature_integration.py +650 -0
  93. runsight_core-0.1.2/tests/test_custom_asset_tool_contract.py +59 -0
  94. runsight_core-0.1.2/tests/test_discovery.py +1010 -0
  95. runsight_core-0.1.2/tests/test_dispatch_block_stateful.py +1009 -0
  96. runsight_core-0.1.2/tests/test_dispatch_budget_isolation.py +639 -0
  97. runsight_core-0.1.2/tests/test_dispatch_exit_def.py +123 -0
  98. runsight_core-0.1.2/tests/test_dispatch_synthesize_integration.py +829 -0
  99. runsight_core-0.1.2/tests/test_dispatch_v2.py +1072 -0
  100. runsight_core-0.1.2/tests/test_e2e_block_timeout.py +489 -0
  101. runsight_core-0.1.2/tests/test_e2e_cost_cap.py +438 -0
  102. runsight_core-0.1.2/tests/test_e2e_dispatch_budget.py +484 -0
  103. runsight_core-0.1.2/tests/test_e2e_warn_and_flow_timeout.py +879 -0
  104. runsight_core-0.1.2/tests/test_fit_to_budget_phase1.py +449 -0
  105. runsight_core-0.1.2/tests/test_fit_to_budget_phase2.py +620 -0
  106. runsight_core-0.1.2/tests/test_gate_error_subclass.py +8 -0
  107. runsight_core-0.1.2/tests/test_gate_file_writer_blocks.py +168 -0
  108. runsight_core-0.1.2/tests/test_integration_blocks_workflow.py +395 -0
  109. runsight_core-0.1.2/tests/test_integration_merge_validation.py +86 -0
  110. runsight_core-0.1.2/tests/test_integration_runner_primitives.py +290 -0
  111. runsight_core-0.1.2/tests/test_integration_state_blocks.py +290 -0
  112. runsight_core-0.1.2/tests/test_integration_workflow.py +422 -0
  113. runsight_core-0.1.2/tests/test_integration_workflow_block.py +492 -0
  114. runsight_core-0.1.2/tests/test_integration_workflow_block_backward_compat.py +527 -0
  115. runsight_core-0.1.2/tests/test_integration_workflow_block_e2e.py +869 -0
  116. runsight_core-0.1.2/tests/test_integration_workflow_block_parser.py +548 -0
  117. runsight_core-0.1.2/tests/test_integration_workflow_block_with_other_blocks.py +413 -0
  118. runsight_core-0.1.2/tests/test_iso_001_envelope_models.py +560 -0
  119. runsight_core-0.1.2/tests/test_iso_002_ipc_protocol.py +953 -0
  120. runsight_core-0.1.2/tests/test_iso_003_harness.py +952 -0
  121. runsight_core-0.1.2/tests/test_iso_004_worker.py +731 -0
  122. runsight_core-0.1.2/tests/test_iso_005_block_migration.py +1143 -0
  123. runsight_core-0.1.2/tests/test_iso_006_dispatch_delegate.py +512 -0
  124. runsight_core-0.1.2/tests/test_iso_007_monitoring.py +673 -0
  125. runsight_core-0.1.2/tests/test_iso_008_credentials.py +764 -0
  126. runsight_core-0.1.2/tests/test_linearblock_stateful.py +605 -0
  127. runsight_core-0.1.2/tests/test_loop_block.py +830 -0
  128. runsight_core-0.1.2/tests/test_loop_break_conditions.py +873 -0
  129. runsight_core-0.1.2/tests/test_loop_carry_context.py +1391 -0
  130. runsight_core-0.1.2/tests/test_loop_exports_schema.py +490 -0
  131. runsight_core-0.1.2/tests/test_loop_workflow_validation.py +193 -0
  132. runsight_core-0.1.2/tests/test_loopblock_kwargs_forwarding.py +552 -0
  133. runsight_core-0.1.2/tests/test_loopblock_stateful_integration.py +855 -0
  134. runsight_core-0.1.2/tests/test_model_catalog.py +519 -0
  135. runsight_core-0.1.2/tests/test_observer.py +135 -0
  136. runsight_core-0.1.2/tests/test_observer_soul_extension.py +202 -0
  137. runsight_core-0.1.2/tests/test_parser_inputs_outputs.py +884 -0
  138. runsight_core-0.1.2/tests/test_parser_workflow_block.py +432 -0
  139. runsight_core-0.1.2/tests/test_primitives.py +24 -0
  140. runsight_core-0.1.2/tests/test_primitives_extended.py +220 -0
  141. runsight_core-0.1.2/tests/test_prompt_hash.py +151 -0
  142. runsight_core-0.1.2/tests/test_registry.py +128 -0
  143. runsight_core-0.1.2/tests/test_remove_placeholder_block.py +170 -0
  144. runsight_core-0.1.2/tests/test_retry_config.py +554 -0
  145. runsight_core-0.1.2/tests/test_retry_execution.py +692 -0
  146. runsight_core-0.1.2/tests/test_retry_stateful.py +611 -0
  147. runsight_core-0.1.2/tests/test_retryblock_migration.py +328 -0
  148. runsight_core-0.1.2/tests/test_run126_code_block_parser_and_achat.py +307 -0
  149. runsight_core-0.1.2/tests/test_run127_runner_get_client_api_key.py +58 -0
  150. runsight_core-0.1.2/tests/test_run137_async_subprocess.py +323 -0
  151. runsight_core-0.1.2/tests/test_run141_multi_provider_keys.py +185 -0
  152. runsight_core-0.1.2/tests/test_run170_complex_read_sites.py +279 -0
  153. runsight_core-0.1.2/tests/test_run177_block_result.py +207 -0
  154. runsight_core-0.1.2/tests/test_run178_write_sites_block_result.py +490 -0
  155. runsight_core-0.1.2/tests/test_run179_strict_block_result.py +158 -0
  156. runsight_core-0.1.2/tests/test_run181_read_site_migration.py +310 -0
  157. runsight_core-0.1.2/tests/test_run219_auto_registration.py +575 -0
  158. runsight_core-0.1.2/tests/test_run222_migrate_blocks.py +359 -0
  159. runsight_core-0.1.2/tests/test_run377_yaml_enabled.py +101 -0
  160. runsight_core-0.1.2/tests/test_run415_no_builtin_souls.py +104 -0
  161. runsight_core-0.1.2/tests/test_run468_parser_soul_field_forwarding.py +107 -0
  162. runsight_core-0.1.2/tests/test_run469_discover_soul_fields.py +117 -0
  163. runsight_core-0.1.2/tests/test_run569_project_root_resolution.py +248 -0
  164. runsight_core-0.1.2/tests/test_run570_kill_inline_souls.py +278 -0
  165. runsight_core-0.1.2/tests/test_run571_wire_soul_ref_to_library.py +654 -0
  166. runsight_core-0.1.2/tests/test_run572_library_soul_tool_governance.py +643 -0
  167. runsight_core-0.1.2/tests/test_run603_workflow_interface_schema.py +96 -0
  168. runsight_core-0.1.2/tests/test_run604_interface_execution.py +422 -0
  169. runsight_core-0.1.2/tests/test_run605_on_error_modes.py +349 -0
  170. runsight_core-0.1.2/tests/test_run606_runtime_depth_parity.py +330 -0
  171. runsight_core-0.1.2/tests/test_run614_integration_subworkflow.py +804 -0
  172. runsight_core-0.1.2/tests/test_run628_noise_cleanup_verification.py +410 -0
  173. runsight_core-0.1.2/tests/test_run629_dispatch_e2e.py +535 -0
  174. runsight_core-0.1.2/tests/test_run644_dispatch_runtime_rename.py +96 -0
  175. runsight_core-0.1.2/tests/test_run645_dispatch_schema_canonicalization.py +81 -0
  176. runsight_core-0.1.2/tests/test_run663_child_observer_wrapper.py +315 -0
  177. runsight_core-0.1.2/tests/test_run663_parser_round_trip.py +102 -0
  178. runsight_core-0.1.2/tests/test_run668_depends_error_routes.py +399 -0
  179. runsight_core-0.1.2/tests/test_run669_gate_shortcuts.py +222 -0
  180. runsight_core-0.1.2/tests/test_run670_error_route_runtime.py +261 -0
  181. runsight_core-0.1.2/tests/test_run671_routes_shorthand.py +573 -0
  182. runsight_core-0.1.2/tests/test_run675_block_execution_context.py +68 -0
  183. runsight_core-0.1.2/tests/test_run676_execute_block_extraction.py +234 -0
  184. runsight_core-0.1.2/tests/test_run677_workflow_run_execute_block_wiring.py +334 -0
  185. runsight_core-0.1.2/tests/test_run678_loop_execute_block_wiring.py +253 -0
  186. runsight_core-0.1.2/tests/test_run680_codeblock_exit_handle.py +275 -0
  187. runsight_core-0.1.2/tests/test_run681_linearblock_exit_conditions.py +418 -0
  188. runsight_core-0.1.2/tests/test_run682_workflowblock_loopblock_e2e.py +603 -0
  189. runsight_core-0.1.2/tests/test_run683_nested_loopblock_observer.py +521 -0
  190. runsight_core-0.1.2/tests/test_run684_exit_handle_all_block_types.py +730 -0
  191. runsight_core-0.1.2/tests/test_run685_eval_debt_integration.py +573 -0
  192. runsight_core-0.1.2/tests/test_run688_soul_assertions_cleanup.py +122 -0
  193. runsight_core-0.1.2/tests/test_run690_delete_duplicate_resolve_soul.py +93 -0
  194. runsight_core-0.1.2/tests/test_run692_inline_soul_fixture_migration.py +230 -0
  195. runsight_core-0.1.2/tests/test_run693_step_wrapper_assertions.py +274 -0
  196. runsight_core-0.1.2/tests/test_run694_eval_yaml_schema.py +434 -0
  197. runsight_core-0.1.2/tests/test_run695_eval_runner.py +756 -0
  198. runsight_core-0.1.2/tests/test_run699_eval_integration.py +829 -0
  199. runsight_core-0.1.2/tests/test_run700_eval_e2e.py +854 -0
  200. runsight_core-0.1.2/tests/test_run701_state_isolation_verification.py +172 -0
  201. runsight_core-0.1.2/tests/test_run702_mixed_pipeline_e2e.py +493 -0
  202. runsight_core-0.1.2/tests/test_run703_dispatch_in_loop_e2e.py +790 -0
  203. runsight_core-0.1.2/tests/test_run704_error_route_output_mapping_e2e.py +720 -0
  204. runsight_core-0.1.2/tests/test_runner.py +247 -0
  205. runsight_core-0.1.2/tests/test_runner_messages.py +112 -0
  206. runsight_core-0.1.2/tests/test_sandbox_hardening.py +541 -0
  207. runsight_core-0.1.2/tests/test_schema.py +145 -0
  208. runsight_core-0.1.2/tests/test_schema_validation.py +463 -0
  209. runsight_core-0.1.2/tests/test_state.py +103 -0
  210. runsight_core-0.1.2/tests/test_tool_integration.py +2225 -0
  211. runsight_core-0.1.2/tests/test_tool_registry.py +413 -0
  212. runsight_core-0.1.2/tests/test_windowing.py +248 -0
  213. runsight_core-0.1.2/tests/test_workflow.py +818 -0
  214. runsight_core-0.1.2/tests/test_workflow_block_execute.py +335 -0
  215. runsight_core-0.1.2/tests/test_workflow_block_recursion.py +141 -0
  216. runsight_core-0.1.2/tests/test_workflow_defensive_observer.py +237 -0
  217. runsight_core-0.1.2/tests/test_workflow_output_conditions.py +819 -0
  218. runsight_core-0.1.2/tests/test_yaml_assertions_config.py +191 -0
  219. runsight_core-0.1.2/tests/test_yaml_dx_e2e.py +449 -0
  220. runsight_core-0.1.2/tests/test_yaml_dx_sugar.py +868 -0
  221. runsight_core-0.1.2/tests/test_yaml_parser.py +834 -0
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: runsight-core
3
+ Version: 0.1.2
4
+ Summary: Runsight Agent OS Core Engine
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: pydantic>=2.0
8
+ Requires-Dist: litellm>=1.0.0
9
+ Requires-Dist: openai>=1.0.0
10
+ Requires-Dist: httpx>=0.27
11
+ Requires-Dist: jsonschema>=4.0
12
+ Requires-Dist: editdistance>=0.6
13
+ Requires-Dist: rouge-score>=0.1
14
+ Requires-Dist: jsonpath-ng>=1.6
15
+ Provides-Extra: dev
16
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
17
+ Requires-Dist: httpx>=0.27; extra == "dev"
18
+ Requires-Dist: respx>=0.22; extra == "dev"
@@ -0,0 +1,6 @@
1
+ # packages/core
2
+
3
+ Canonical home for the reusable Runsight runtime engine.
4
+
5
+ This package owns the Python runtime engine, its schema snapshot, and the core
6
+ test suite.
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools>=75.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "runsight-core"
7
+ version = "0.1.2"
8
+ description = "Runsight Agent OS Core Engine"
9
+ requires-python = ">=3.11"
10
+ license = "Apache-2.0"
11
+ dependencies = [
12
+ "pydantic>=2.0",
13
+ "litellm>=1.0.0",
14
+ "openai>=1.0.0",
15
+ "httpx>=0.27",
16
+ "jsonschema>=4.0",
17
+ "editdistance>=0.6",
18
+ "rouge-score>=0.1",
19
+ "jsonpath-ng>=1.6",
20
+ ]
21
+
22
+ [project.optional-dependencies]
23
+ dev = [
24
+ "pytest-asyncio>=0.23",
25
+ "httpx>=0.27",
26
+ "respx>=0.22",
27
+ ]
28
+
29
+ [tool.setuptools.packages.find]
30
+ where = ["src"]
31
+ include = ["runsight_core*"]
32
+
33
+ [tool.pytest.ini_options]
34
+ asyncio_mode = "auto"
35
+
36
+ [tool.mypy]
37
+ python_version = "3.11"
38
+ strict = true
39
+ warn_unused_configs = true
40
+ disallow_untyped_defs = true
41
+ disallow_any_unimported = true
42
+ no_implicit_optional = true
43
+ warn_redundant_casts = true
44
+ warn_unused_ignores = true
45
+ warn_no_return = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,44 @@
1
+ """
2
+ Runsight Agent OS Core Engine
3
+ """
4
+
5
+ from .blocks.base import BaseBlock
6
+ from .blocks.code import CodeBlock
7
+ from .blocks.dispatch import DispatchBlock
8
+ from .blocks.gate import GateBlock
9
+ from .blocks.linear import LinearBlock
10
+ from .blocks.loop import CarryContextConfig, LoopBlock, LoopBlockDef
11
+ from .blocks.registry import BlockFactory, BlockRegistry
12
+ from .blocks.synthesize import SynthesizeBlock
13
+ from .blocks.workflow_block import WorkflowBlock
14
+ from .primitives import Soul, Step, Task
15
+ from .runner import ExecutionResult, RunsightTeamRunner
16
+ from .state import BlockResult, WorkflowState
17
+ from .workflow import Workflow
18
+ from .yaml import parse_workflow_yaml
19
+ from .yaml.schema import RetryConfig
20
+
21
+ __all__ = [
22
+ "Soul",
23
+ "Task",
24
+ "Step",
25
+ "RunsightTeamRunner",
26
+ "ExecutionResult",
27
+ "BlockResult",
28
+ "WorkflowState",
29
+ "BaseBlock",
30
+ "LinearBlock",
31
+ "DispatchBlock",
32
+ "SynthesizeBlock",
33
+ "LoopBlock",
34
+ "GateBlock",
35
+ "WorkflowBlock",
36
+ "CodeBlock",
37
+ "BlockRegistry",
38
+ "BlockFactory",
39
+ "Workflow",
40
+ "parse_workflow_yaml",
41
+ "LoopBlockDef",
42
+ "RetryConfig",
43
+ "CarryContextConfig",
44
+ ]
@@ -0,0 +1,66 @@
1
+ """
2
+ ArtifactStore ABC and InMemoryArtifactStore for workflow artifact management.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any, Dict, List, Optional
7
+
8
+
9
+ class ArtifactStore(ABC):
10
+ """Abstract base class for artifact storage backends."""
11
+
12
+ def __init__(self, run_id: str) -> None:
13
+ self.run_id = run_id
14
+
15
+ @abstractmethod
16
+ async def write(
17
+ self, key: str, content: str, *, metadata: Optional[Dict[str, Any]] = None
18
+ ) -> str: ...
19
+
20
+ @abstractmethod
21
+ async def read(self, ref: str) -> str: ...
22
+
23
+ @abstractmethod
24
+ async def list_artifacts(self) -> List[Dict[str, Any]]: ...
25
+
26
+ @abstractmethod
27
+ async def cleanup(self) -> None: ...
28
+
29
+
30
+ class InMemoryArtifactStore(ArtifactStore):
31
+ """In-memory artifact store using mem://{run_id}/{key} refs."""
32
+
33
+ def __init__(self, run_id: str) -> None:
34
+ super().__init__(run_id)
35
+ self._content: Dict[str, str] = {}
36
+ self._metadata: Dict[str, Optional[Dict[str, Any]]] = {}
37
+
38
+ async def write(
39
+ self, key: str, content: str, *, metadata: Optional[Dict[str, Any]] = None
40
+ ) -> str:
41
+ self._content[key] = content
42
+ self._metadata[key] = metadata
43
+ return f"mem://{self.run_id}/{key}"
44
+
45
+ async def read(self, ref: str) -> str:
46
+ prefix = f"mem://{self.run_id}/"
47
+ if not ref.startswith(prefix):
48
+ raise KeyError(ref)
49
+ key = ref[len(prefix) :]
50
+ if key not in self._content:
51
+ raise KeyError(ref)
52
+ return self._content[key]
53
+
54
+ async def list_artifacts(self) -> List[Dict[str, Any]]:
55
+ return [
56
+ {
57
+ "key": key,
58
+ "ref": f"mem://{self.run_id}/{key}",
59
+ "metadata": self._metadata[key],
60
+ }
61
+ for key in self._content
62
+ ]
63
+
64
+ async def cleanup(self) -> None:
65
+ self._content.clear()
66
+ self._metadata.clear()
@@ -0,0 +1,25 @@
1
+ """Assertion plugin interface for runsight_core."""
2
+
3
+ from runsight_core.assertions.base import (
4
+ Assertion,
5
+ AssertionContext,
6
+ GradingResult,
7
+ TokenUsage,
8
+ )
9
+ from runsight_core.assertions.registry import (
10
+ register_assertion,
11
+ run_assertion,
12
+ run_assertions,
13
+ )
14
+ from runsight_core.assertions.scoring import AssertionsResult
15
+
16
+ __all__ = [
17
+ "Assertion",
18
+ "AssertionContext",
19
+ "AssertionsResult",
20
+ "GradingResult",
21
+ "TokenUsage",
22
+ "register_assertion",
23
+ "run_assertion",
24
+ "run_assertions",
25
+ ]
@@ -0,0 +1,57 @@
1
+ """Base models for the assertion plugin interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Protocol, runtime_checkable
7
+
8
+
9
+ @dataclass
10
+ class TokenUsage:
11
+ """Token usage breakdown for an assertion evaluation."""
12
+
13
+ prompt: int = 0
14
+ completion: int = 0
15
+ total: int = 0
16
+
17
+
18
+ @dataclass
19
+ class GradingResult:
20
+ """Result of a single assertion evaluation."""
21
+
22
+ passed: bool
23
+ score: float
24
+ reason: str
25
+ named_scores: dict[str, float] = field(default_factory=dict)
26
+ tokens_used: TokenUsage | None = None
27
+ component_results: list[GradingResult] = field(default_factory=list)
28
+ assertion_type: str | None = None
29
+ metadata: dict[str, Any] = field(default_factory=dict)
30
+
31
+
32
+ @dataclass
33
+ class AssertionContext:
34
+ """Context provided to assertion evaluators."""
35
+
36
+ output: str
37
+ prompt: str
38
+ prompt_hash: str
39
+ soul_id: str
40
+ soul_version: str
41
+ block_id: str
42
+ block_type: str
43
+ cost_usd: float
44
+ total_tokens: int
45
+ latency_ms: float
46
+ variables: dict[str, Any]
47
+ run_id: str
48
+ workflow_id: str
49
+
50
+
51
+ @runtime_checkable
52
+ class Assertion(Protocol):
53
+ """Protocol that assertion plugins must satisfy."""
54
+
55
+ type: str
56
+
57
+ def evaluate(self, output: str, context: AssertionContext) -> GradingResult: ...
@@ -0,0 +1,65 @@
1
+ """Deterministic assertion plugins — registers all 15 types on import."""
2
+
3
+ from runsight_core.assertions.deterministic.linguistic import (
4
+ BleuAssertion,
5
+ LevenshteinAssertion,
6
+ RougeNAssertion,
7
+ )
8
+ from runsight_core.assertions.deterministic.performance import (
9
+ CostAssertion,
10
+ LatencyAssertion,
11
+ )
12
+ from runsight_core.assertions.deterministic.string import (
13
+ ContainsAllAssertion,
14
+ ContainsAnyAssertion,
15
+ ContainsAssertion,
16
+ EqualsAssertion,
17
+ IContainsAssertion,
18
+ RegexAssertion,
19
+ StartsWithAssertion,
20
+ WordCountAssertion,
21
+ )
22
+ from runsight_core.assertions.deterministic.structural import (
23
+ ContainsJsonAssertion,
24
+ IsJsonAssertion,
25
+ )
26
+ from runsight_core.assertions.registry import register_assertion
27
+
28
+ _ALL_ASSERTIONS: list[type] = [
29
+ EqualsAssertion,
30
+ ContainsAssertion,
31
+ IContainsAssertion,
32
+ ContainsAllAssertion,
33
+ ContainsAnyAssertion,
34
+ StartsWithAssertion,
35
+ RegexAssertion,
36
+ WordCountAssertion,
37
+ IsJsonAssertion,
38
+ ContainsJsonAssertion,
39
+ CostAssertion,
40
+ LatencyAssertion,
41
+ LevenshteinAssertion,
42
+ BleuAssertion,
43
+ RougeNAssertion,
44
+ ]
45
+
46
+ for _cls in _ALL_ASSERTIONS:
47
+ register_assertion(_cls.type, _cls)
48
+
49
+ __all__ = [
50
+ "BleuAssertion",
51
+ "ContainsAllAssertion",
52
+ "ContainsAnyAssertion",
53
+ "ContainsAssertion",
54
+ "ContainsJsonAssertion",
55
+ "CostAssertion",
56
+ "EqualsAssertion",
57
+ "IContainsAssertion",
58
+ "IsJsonAssertion",
59
+ "LatencyAssertion",
60
+ "LevenshteinAssertion",
61
+ "RegexAssertion",
62
+ "RougeNAssertion",
63
+ "StartsWithAssertion",
64
+ "WordCountAssertion",
65
+ ]
@@ -0,0 +1,129 @@
1
+ """Deterministic linguistic assertion plugins.
2
+
3
+ Covers: levenshtein, bleu, rouge-n.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import math
9
+ from collections import Counter
10
+ from typing import Any
11
+
12
+ import editdistance
13
+ from rouge_score import rouge_scorer
14
+
15
+ from runsight_core.assertions.base import AssertionContext, GradingResult
16
+
17
+
18
+ class LevenshteinAssertion:
19
+ """Edit distance <= threshold."""
20
+
21
+ type = "levenshtein"
22
+
23
+ def __init__(self, value: Any = "", threshold: float | None = None) -> None:
24
+ self.value = str(value)
25
+ self.threshold = threshold if threshold is not None else 5
26
+
27
+ def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
28
+ distance = editdistance.eval(output, self.value)
29
+ passed = distance <= self.threshold
30
+ score = 1.0 if passed else 0.0
31
+ reason = f"Levenshtein distance is {distance} (threshold {self.threshold})"
32
+ return GradingResult(passed=passed, score=score, reason=reason)
33
+
34
+
35
+ class BleuAssertion:
36
+ """BLEU-4 score >= threshold. Inline implementation (no nltk)."""
37
+
38
+ type = "bleu"
39
+
40
+ def __init__(self, value: Any = "", threshold: float | None = None) -> None:
41
+ self.value = str(value)
42
+ self.threshold = threshold if threshold is not None else 0.5
43
+
44
+ def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
45
+ score = _compute_bleu(reference=self.value, candidate=output)
46
+ passed = score >= self.threshold
47
+ reason = f"BLEU score {score:.4f} {'>='} threshold {self.threshold}"
48
+ if not passed:
49
+ reason = f"BLEU score {score:.4f} < threshold {self.threshold}"
50
+ return GradingResult(passed=passed, score=score, reason=reason)
51
+
52
+
53
+ class RougeNAssertion:
54
+ """ROUGE-N score >= threshold using rouge-score library."""
55
+
56
+ type = "rouge-n"
57
+
58
+ def __init__(self, value: Any = "", threshold: float | None = None) -> None:
59
+ self.value = str(value)
60
+ self.threshold = threshold if threshold is not None else 0.75
61
+
62
+ def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
63
+ if not output or not self.value:
64
+ score = 0.0
65
+ else:
66
+ scorer = rouge_scorer.RougeScorer(["rouge1"], use_stemmer=False)
67
+ scores = scorer.score(self.value, output)
68
+ score = scores["rouge1"].fmeasure
69
+
70
+ passed = score >= self.threshold
71
+ if passed:
72
+ reason = f"ROUGE-N score {score:.4f} >= threshold {self.threshold}"
73
+ else:
74
+ reason = f"ROUGE-N score {score:.4f} < threshold {self.threshold}"
75
+ return GradingResult(passed=passed, score=score, reason=reason)
76
+
77
+
78
+ # ── Inline BLEU-4 implementation ────────────────────────────────────────────
79
+
80
+
81
+ def _get_ngrams(tokens: list[str], n: int) -> Counter[tuple[str, ...]]:
82
+ """Extract n-grams from a token list."""
83
+ return Counter(tuple(tokens[i : i + n]) for i in range(len(tokens) - n + 1))
84
+
85
+
86
+ def _compute_bleu(reference: str, candidate: str, max_n: int = 4) -> float:
87
+ """Compute BLEU score with smoothing (method 1: add 1 to numerator/denominator).
88
+
89
+ Ported from promptfoo's BLEU implementation.
90
+ """
91
+ ref_tokens = reference.lower().split()
92
+ cand_tokens = candidate.lower().split()
93
+
94
+ if not cand_tokens:
95
+ return 0.0
96
+ if not ref_tokens:
97
+ return 0.0
98
+
99
+ # Brevity penalty
100
+ bp = 1.0
101
+ if len(cand_tokens) < len(ref_tokens):
102
+ bp = math.exp(1.0 - len(ref_tokens) / len(cand_tokens))
103
+
104
+ # Modified precision for each n-gram order with smoothing
105
+ log_avg = 0.0
106
+ for n in range(1, max_n + 1):
107
+ ref_ngrams = _get_ngrams(ref_tokens, n)
108
+ cand_ngrams = _get_ngrams(cand_tokens, n)
109
+
110
+ # Clipped counts
111
+ clipped = 0
112
+ total = 0
113
+ for ngram, count in cand_ngrams.items():
114
+ clipped += min(count, ref_ngrams.get(ngram, 0))
115
+ total += count
116
+
117
+ # Smoothing: add 1 to both numerator and denominator when n > 1
118
+ if n == 1:
119
+ if total == 0:
120
+ return 0.0
121
+ precision = clipped / total
122
+ if precision == 0:
123
+ return 0.0
124
+ else:
125
+ precision = (clipped + 1) / (total + 1)
126
+
127
+ log_avg += math.log(precision) / max_n
128
+
129
+ return bp * math.exp(log_avg)
@@ -0,0 +1,58 @@
1
+ """Deterministic performance assertion plugins.
2
+
3
+ Covers: cost, latency.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+ from runsight_core.assertions.base import AssertionContext, GradingResult
11
+
12
+
13
+ class CostAssertion:
14
+ """Check that cost_usd from context is within threshold."""
15
+
16
+ type = "cost"
17
+
18
+ def __init__(self, value: Any = None, threshold: float | None = None) -> None:
19
+ self.value = value
20
+ self.threshold = threshold
21
+
22
+ def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
23
+ cost = context.cost_usd
24
+ threshold = self.threshold if self.threshold is not None else 0.0
25
+ if cost <= threshold:
26
+ return GradingResult(
27
+ passed=True,
28
+ score=1.0,
29
+ reason=f"Cost ${cost:.4f} is within threshold ${threshold:.4f}",
30
+ )
31
+ return GradingResult(
32
+ passed=False, score=0.0, reason=f"Cost ${cost:.4f} exceeds threshold ${threshold:.4f}"
33
+ )
34
+
35
+
36
+ class LatencyAssertion:
37
+ """Check that latency_ms from context is within threshold."""
38
+
39
+ type = "latency"
40
+
41
+ def __init__(self, value: Any = None, threshold: float | None = None) -> None:
42
+ self.value = value
43
+ self.threshold = threshold
44
+
45
+ def evaluate(self, output: str, context: AssertionContext) -> GradingResult:
46
+ latency = context.latency_ms
47
+ threshold = self.threshold if self.threshold is not None else 0.0
48
+ if latency <= threshold:
49
+ return GradingResult(
50
+ passed=True,
51
+ score=1.0,
52
+ reason=f"Latency {latency:.1f}ms is within threshold {threshold:.1f}ms",
53
+ )
54
+ return GradingResult(
55
+ passed=False,
56
+ score=0.0,
57
+ reason=f"Latency {latency:.1f}ms exceeds threshold {threshold:.1f}ms",
58
+ )