intuned-agent-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- intuned_agent_mcp-0.1.0/.env +12 -0
- intuned_agent_mcp-0.1.0/.env.tpl +12 -0
- intuned_agent_mcp-0.1.0/.gitignore +5 -0
- intuned_agent_mcp-0.1.0/.python-version +1 -0
- intuned_agent_mcp-0.1.0/PKG-INFO +73 -0
- intuned_agent_mcp-0.1.0/README.md +49 -0
- intuned_agent_mcp-0.1.0/RELEASING.md +144 -0
- intuned_agent_mcp-0.1.0/pyproject.toml +93 -0
- intuned_agent_mcp-0.1.0/scripts/compare_experiments.py +168 -0
- intuned_agent_mcp-0.1.0/scripts/dev-inspector.py +321 -0
- intuned_agent_mcp-0.1.0/scripts/dump_tool_schemas.py +72 -0
- intuned_agent_mcp-0.1.0/scripts/release.py +115 -0
- intuned_agent_mcp-0.1.0/scripts/run_selector_experiment.py +221 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/__init__.py +21 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/__main__.py +225 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/control_server.py +159 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/__init__.py +11 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/__init__.py +9 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/custom_tool_node.py +152 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/graph.py +257 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/submit_tools.py +209 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/types.py +28 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/utils/__init__.py +19 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/utils/correctness_test_utils.py +199 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/utils/example_node_utils.py +221 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/utils/feedback_utils.py +174 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/utils/nearby_anchors/__init__.py +7 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/utils/nearby_anchors/find_nearby_anchors.js +197 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/utils/nearby_anchors/find_nearby_anchors.py +63 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/agent/utils/reliability_test_utils.py +90 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/deterministic_strategies/__init__.py +5 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/deterministic_strategies/ancestor_table_strategy.py +122 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/deterministic_strategies/ancestor_table_strategy_utils.js +109 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/deterministic_strategies/deterministic_strategies.py +65 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/llm_judges.py +146 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/prompt.py +325 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/run.py +99 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/selector_candidates_manager.py +83 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/reliable_selector_builder/types.py +86 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/server.py +172 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/__init__.py +120 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/browser_scripts/__init__.py +9 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/browser_scripts/browser_scripts.js +1173 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/browser_scripts/tests/__init__.py +0 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/browser_scripts/tests/test_element_ids_scripts.py +590 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/browser_scripts/utils.py +25 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/__init__.py +15 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/browser_manager.py +220 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/network_tracker.py +293 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/tests/__init__.py +0 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/tests/fixtures/opens_new_tab.html +14 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/tests/fixtures/simple.html +8 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/tests/fixtures/with_image.html +10 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/tests/test_eager_tracking.py +166 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/tests/test_network_tracker.py +161 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/cdp_utils/utils.py +49 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/conftest.py +72 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/__init__.py +19 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/constants.py +5 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/inspect_node.py +470 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/readme.md +42 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/scrollable_view.py +265 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/tests/__init__.py +1 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/tests/fixtures/test.html +104 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/tests/fixtures/test_with_dynamic_iframe.html +58 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/tests/fixtures/test_with_iframes.html +46 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/tests/test_inspection_node.py +412 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/tests/test_inspection_node_to_full_string.py +322 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/dom_inspector/tests/test_scrollable_view.py +334 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/element_id_utils/__init__.py +37 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/element_id_utils/element_locator_chain.py +80 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/element_id_utils/find_element_by_element_id.py +120 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/element_id_utils/page_utils.py +100 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/element_id_utils/tests/__init__.py +0 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/element_id_utils/tests/test_find_element_by_element_id.py +186 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/element_id_utils/tests/test_page_utils.py +171 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/element_id_utils/utils.py +81 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/errors.py +15 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/__init__.py +27 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/check_frame_allows_async_scripts.py +33 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/find_frame_and_relative_coordinates.py +130 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/frame_tree.py +192 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/get_container_frame.py +35 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/stitch_iframe.py +284 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/tests/__init__.py +1 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/tests/test_find_all_iframes.py +881 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/tests/test_find_frame_and_relative_coordinates.py +117 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/tests/test_stitch_iframe.py +583 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/frame_utils/utils.py +145 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/image_utils.py +18 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/langchain_utils/__init__.py +27 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/langchain_utils/caching_chat_anthropic.py +134 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/langchain_utils/chat_gemini.py +374 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/langchain_utils/cli_gateway.py +67 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/langchain_utils/get_chat_model.py +164 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/langchain_utils/test_chat_gemini.py +912 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/langchain_utils/test_get_chat_model.py +298 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/langchain_utils/token_utils.py +66 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/lazy_traceable.py +42 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/__init__.py +25 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/tests/__init__.py +1 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/tests/test_filtering.py +75 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/tests/test_is_match_exact.py +30 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/tests/test_normalize_spacing.py +24 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/tests/test_rank_match.py +11 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/tests/test_remove_punctuation_and_spaces.py +24 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/tests/test_select_best_match.py +96 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/types.py +28 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/matching/utils.py +272 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/page_dom_file.py +83 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/screenshot_utils.py +110 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/tests/__init__.py +0 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/tests/test_artifacts_hooks.py +841 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/tests/test_compact_platform_runs_get.py +251 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/tests/test_compact_result.py +302 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/tests/test_compact_test_job_download.py +730 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/tests/test_screenshot_utils.py +142 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/tool_utils.py +85 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/types/__init__.py +7 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/shared/types/selector.py +59 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tests/test_hidden_params.py +180 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tests/test_server.py +51 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/__init__.py +31 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/__init__.py +15 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/find.py +429 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/find_array_container.py +246 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/find_container_helpers.py +182 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/inspect_element.py +91 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/query_by_selector.py +220 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/__init__.py +1 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/fixtures/test_matches.html +153 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/fixtures/test_two_containers.html +61 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/fixtures/test_window_size.html +146 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/fixtures/test_with_iframes.html +69 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/fixtures/test_with_iframes_dynamic.html +80 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/test_find_aria_patching.py +459 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/test_find_container_helpers.py +369 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/test_inspect_element.py +377 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/test_query_by_selector.py +340 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/test_query_by_strings.py +323 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/browser_tools/tests/test_scroll_into_view.py +266 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/captcha_tools/__init__.py +7 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/captcha_tools/wait_for_captcha_solve.py +174 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/__init__.py +15 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/computer.py +948 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/form_input.py +257 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/navigate.py +115 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/scroll_into_view.py +80 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/__init__.py +1 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/conftest.py +84 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/fixtures/scrollbars_test.html +305 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/fixtures/test.html +152 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/test_computer.py +291 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/test_computer_events.py +172 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/test_form_input.py +393 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/test_navigate.py +111 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/test_scroll_tracking.py +397 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/tests/test_utils.py +343 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/computer_tools/utils.py +505 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/__init__.py +7 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/build_field_selector.py +202 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/build_reliable_selector.py +225 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/selector_agent_plan.py +192 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/tests/__init__.py +0 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/tests/test_iframe_selector_tools.py +400 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/tests/test_selector_agent_plan.py +501 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/types.py +66 -0
- intuned_agent_mcp-0.1.0/src/intuned_agent_mcp/tools/selector_tools/utils.py +69 -0
- intuned_agent_mcp-0.1.0/uv.lock +3245 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Generated by `yarn local:setup:*` from .env.tpl. Do not edit by hand.
|
|
2
|
+
# CLAUDE_SDK_WORKING_DIRECTORY is injected by the Claude Agent SDK at spawn.
|
|
3
|
+
|
|
4
|
+
# Prevent litellm from fetching remote model cost data at import time
|
|
5
|
+
LITELLM_LOCAL_MODEL_COST_MAP=True
|
|
6
|
+
|
|
7
|
+
ANTHROPIC_API_KEY="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"
|
|
8
|
+
GEMINI_API_KEY="AIzaSyChS2tjH5--n2LtrMiB1EST7CwA3XA0R2A"
|
|
9
|
+
|
|
10
|
+
LANGSMITH_TRACING=true
|
|
11
|
+
LANGSMITH_API_KEY="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"
|
|
12
|
+
LANGSMITH_PROJECT=intuned-agent-mcp
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Generated by `yarn local:setup:*` from .env.tpl. Do not edit by hand.
|
|
2
|
+
# CLAUDE_SDK_WORKING_DIRECTORY is injected by the Claude Agent SDK at spawn.
|
|
3
|
+
|
|
4
|
+
# Prevent litellm from fetching remote model cost data at import time
|
|
5
|
+
LITELLM_LOCAL_MODEL_COST_MAP=True
|
|
6
|
+
|
|
7
|
+
ANTHROPIC_API_KEY={{ANTHROPIC_API_KEY}}
|
|
8
|
+
GEMINI_API_KEY={{GEMINI_API_KEY}}
|
|
9
|
+
|
|
10
|
+
LANGSMITH_TRACING=true
|
|
11
|
+
LANGSMITH_API_KEY={{LANGSMITH_API_KEY}}
|
|
12
|
+
LANGSMITH_PROJECT=intuned-agent-mcp
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: intuned-agent-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server exposing browser automation tools via intuned-runtime
|
|
5
|
+
Requires-Python: <4.0,>=3.12
|
|
6
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
7
|
+
Requires-Dist: httpx>=0.27.0
|
|
8
|
+
Requires-Dist: intuned-browser
|
|
9
|
+
Requires-Dist: intuned-runtime>=1.3.24
|
|
10
|
+
Requires-Dist: langchain-anthropic>=0.2.0
|
|
11
|
+
Requires-Dist: langchain>=0.3.0
|
|
12
|
+
Requires-Dist: langgraph>=0.2.0
|
|
13
|
+
Requires-Dist: langsmith>=0.1.0
|
|
14
|
+
Requires-Dist: lxml>=5.0
|
|
15
|
+
Requires-Dist: mcp<2.0,>=1.0
|
|
16
|
+
Requires-Dist: playwright<2,>=1.58.0
|
|
17
|
+
Requires-Dist: pydantic>=2.0
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pyright>=1.1.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
22
|
+
Requires-Dist: ruff>=0.7.0; extra == 'dev'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# Intuned Agent MCP Server
|
|
26
|
+
|
|
27
|
+
An MCP server exposing browser automation tools: navigation, DOM inspection, element finding, and computer-use actions (click, type, scroll, screenshot). It speaks MCP over stdio.
|
|
28
|
+
|
|
29
|
+
## Run with uvx
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
uvx intuned-agent-mcp
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
To use it from an MCP client, point the client at the `uvx` command:
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"mcpServers": {
|
|
40
|
+
"intuned-agent-mcp": {
|
|
41
|
+
"command": "uvx",
|
|
42
|
+
"args": ["intuned-agent-mcp"]
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Tools
|
|
49
|
+
|
|
50
|
+
| Tool | Description |
|
|
51
|
+
| ---------------------- | --------------------------------------------------------------- |
|
|
52
|
+
| `navigate` | Navigate to a URL, or go back/forward in browser history |
|
|
53
|
+
| `find` | Find elements using natural language descriptions or exact text |
|
|
54
|
+
| `query_by_selector` | Search for DOM elements using CSS selector or XPath |
|
|
55
|
+
| `find_array_container` | Find the DOM container holding a collection of related items |
|
|
56
|
+
| `inspect_element` | Inspect a specific element and return a collapsed DOM preview |
|
|
57
|
+
| `scroll_into_view` | Scroll an element into the viewport |
|
|
58
|
+
| `computer` | Mouse clicks, keyboard input, scrolling, and screenshots |
|
|
59
|
+
| `select_option` | Select an option from a dropdown or select element |
|
|
60
|
+
|
|
61
|
+
## Configuration
|
|
62
|
+
|
|
63
|
+
Logging is off by default. To enable it, set environment variables:
|
|
64
|
+
|
|
65
|
+
| Variable | Description |
|
|
66
|
+
| -------------- | ------------------------------------------------------ |
|
|
67
|
+
| `MCP_LOG_FILE` | Path to write logs to. Logs are disabled when not set. |
|
|
68
|
+
| `MCP_DEBUG` | Set to `1` to log at `DEBUG` level (default `INFO`). |
|
|
69
|
+
|
|
70
|
+
## Requirements
|
|
71
|
+
|
|
72
|
+
- Python 3.12+
|
|
73
|
+
- A Chromium-based browser reachable over the Chrome DevTools Protocol. Browser-driving tools connect to an existing browser over CDP.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Intuned Agent MCP Server
|
|
2
|
+
|
|
3
|
+
An MCP server exposing browser automation tools: navigation, DOM inspection, element finding, and computer-use actions (click, type, scroll, screenshot). It speaks MCP over stdio.
|
|
4
|
+
|
|
5
|
+
## Run with uvx
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uvx intuned-agent-mcp
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
To use it from an MCP client, point the client at the `uvx` command:
|
|
12
|
+
|
|
13
|
+
```json
|
|
14
|
+
{
|
|
15
|
+
"mcpServers": {
|
|
16
|
+
"intuned-agent-mcp": {
|
|
17
|
+
"command": "uvx",
|
|
18
|
+
"args": ["intuned-agent-mcp"]
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Tools
|
|
25
|
+
|
|
26
|
+
| Tool | Description |
|
|
27
|
+
| ---------------------- | --------------------------------------------------------------- |
|
|
28
|
+
| `navigate` | Navigate to a URL, or go back/forward in browser history |
|
|
29
|
+
| `find` | Find elements using natural language descriptions or exact text |
|
|
30
|
+
| `query_by_selector` | Search for DOM elements using CSS selector or XPath |
|
|
31
|
+
| `find_array_container` | Find the DOM container holding a collection of related items |
|
|
32
|
+
| `inspect_element` | Inspect a specific element and return a collapsed DOM preview |
|
|
33
|
+
| `scroll_into_view` | Scroll an element into the viewport |
|
|
34
|
+
| `computer` | Mouse clicks, keyboard input, scrolling, and screenshots |
|
|
35
|
+
| `select_option` | Select an option from a dropdown or select element |
|
|
36
|
+
|
|
37
|
+
## Configuration
|
|
38
|
+
|
|
39
|
+
Logging is off by default. To enable it, set environment variables:
|
|
40
|
+
|
|
41
|
+
| Variable | Description |
|
|
42
|
+
| -------------- | ------------------------------------------------------ |
|
|
43
|
+
| `MCP_LOG_FILE` | Path to write logs to. Logs are disabled when not set. |
|
|
44
|
+
| `MCP_DEBUG` | Set to `1` to log at `DEBUG` level (default `INFO`). |
|
|
45
|
+
|
|
46
|
+
## Requirements
|
|
47
|
+
|
|
48
|
+
- Python 3.12+
|
|
49
|
+
- A Chromium-based browser reachable over the Chrome DevTools Protocol. Browser-driving tools connect to an existing browser over CDP.
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# Releasing `intuned-agent-mcp`
|
|
2
|
+
|
|
3
|
+
Internal guide for publishing the browser-automation MCP server to PyPI so it can
|
|
4
|
+
be run with `uvx intuned-agent-mcp`. The `intuned` Claude Code plugin launches the
|
|
5
|
+
server this way (see the plugin's `.mcp.json`).
|
|
6
|
+
|
|
7
|
+
## Quick release (script)
|
|
8
|
+
|
|
9
|
+
`scripts/release.py` bumps the version, builds, and publishes in one step:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# dev -> TestPyPI
|
|
13
|
+
uv run scripts/release.py dev 0.1.1.dev0
|
|
14
|
+
|
|
15
|
+
# prod -> PyPI
|
|
16
|
+
uv run scripts/release.py prod 0.1.1
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Publishing goes through `twine`, which reads credentials from `~/.pypirc`
|
|
20
|
+
(`[pypi]` for prod, `[testpypi]` for dev; username `__token__`, password your API
|
|
21
|
+
token). The script prints the matching smoke-test command when it finishes. The
|
|
22
|
+
rest of this doc explains each step and the one-time account setup.
|
|
23
|
+
|
|
24
|
+
## What makes it runnable via uvx
|
|
25
|
+
|
|
26
|
+
Three things in `pyproject.toml` must stay intact:
|
|
27
|
+
|
|
28
|
+
- `name = "intuned-agent-mcp"` and a bumped `version`.
|
|
29
|
+
- `readme = "README.md"` so PyPI renders a page.
|
|
30
|
+
- A console-script entry point:
|
|
31
|
+
```toml
|
|
32
|
+
[project.scripts]
|
|
33
|
+
intuned-agent-mcp = "intuned_agent_mcp.__main__:main"
|
|
34
|
+
```
|
|
35
|
+
This is what `uvx intuned-agent-mcp` resolves to. Without it there is no command
|
|
36
|
+
to run.
|
|
37
|
+
|
|
38
|
+
Dependencies (`intuned-runtime`, `intuned-browser`, `playwright`, `mcp`,
|
|
39
|
+
`langchain*`, etc.) all resolve from public PyPI.
|
|
40
|
+
|
|
41
|
+
## Prerequisites
|
|
42
|
+
|
|
43
|
+
- `uv` on `PATH` (`uv --version`).
|
|
44
|
+
- A PyPI account + API token, and a TestPyPI account + API token (separate sites,
|
|
45
|
+
separate accounts). 2FA must be enabled before you can create tokens.
|
|
46
|
+
- For a brand-new project, the first upload needs an **account-scoped** token
|
|
47
|
+
(a project-scoped token cannot exist until the project does). Rotate to a
|
|
48
|
+
project-scoped token after the first publish.
|
|
49
|
+
- Tokens are passed via `--token pypi-...`. Do not commit them.
|
|
50
|
+
|
|
51
|
+
## 1. Bump the version
|
|
52
|
+
|
|
53
|
+
Edit `version` in `pyproject.toml` (e.g. `0.1.0` -> `0.1.1`). PyPI refuses to
|
|
54
|
+
re-upload an existing version, so every publish needs a new number.
|
|
55
|
+
|
|
56
|
+
## 2. Build
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
cd apps/intuned-agent-mcp
|
|
60
|
+
rm -rf dist && uv build
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
This produces `dist/intuned_agent_mcp-<version>.tar.gz` (sdist) and
|
|
64
|
+
`...-py3-none-any.whl` (wheel). Quick sanity checks:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
unzip -p dist/*.whl '*/entry_points.txt' # should show the intuned-agent-mcp console_script
|
|
68
|
+
unzip -p dist/*.whl '*/METADATA' | grep -i description-content-type # text/markdown
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## 3. Publish to TestPyPI first
|
|
72
|
+
|
|
73
|
+
The `testpypi` index is preconfigured in `pyproject.toml` with both an install
|
|
74
|
+
`url` and a `publish-url`:
|
|
75
|
+
|
|
76
|
+
```toml
|
|
77
|
+
[[tool.uv.index]]
|
|
78
|
+
name = "testpypi"
|
|
79
|
+
url = "https://test.pypi.org/simple/"
|
|
80
|
+
publish-url = "https://test.pypi.org/legacy/"
|
|
81
|
+
explicit = true
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Publish:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
uv publish --index testpypi --token pypi-<TESTPYPI_TOKEN>
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Smoke-test the published artifact. TestPyPI does not mirror the dependencies, so
|
|
91
|
+
point uv at TestPyPI for the package and let it fall through to real PyPI for the
|
|
92
|
+
rest:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
uvx --index https://test.pypi.org/simple/ --index-strategy unsafe-best-match intuned-agent-mcp
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
It should start and block on stdin (it is an MCP stdio server). No traceback means
|
|
99
|
+
the entry point and packaging are good. Ctrl+C to exit.
|
|
100
|
+
|
|
101
|
+
## 4. Publish to real PyPI
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
uv publish --token pypi-<PYPI_TOKEN>
|
|
105
|
+
uvx intuned-agent-mcp # final smoke test, no index flags needed
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## 5. Post-publish: simplify the plugin config
|
|
109
|
+
|
|
110
|
+
While testing, the plugin's `.mcp.json` resolves the server from TestPyPI:
|
|
111
|
+
|
|
112
|
+
```json
|
|
113
|
+
"browser": {
|
|
114
|
+
"command": "uvx",
|
|
115
|
+
"args": [
|
|
116
|
+
"--index", "https://test.pypi.org/simple/",
|
|
117
|
+
"--index-strategy", "unsafe-best-match",
|
|
118
|
+
"intuned-agent-mcp"
|
|
119
|
+
]
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Once the version is on real PyPI, drop the index flags:
|
|
124
|
+
|
|
125
|
+
```json
|
|
126
|
+
"browser": { "command": "uvx", "args": ["intuned-agent-mcp"] }
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
The plugin lives in the `Intuned/skills` repo under `intuned-plugin/.mcp.json`.
|
|
130
|
+
Consider pinning the version there (`intuned-agent-mcp==<version>`) so a later
|
|
131
|
+
publish does not silently change plugin behavior.
|
|
132
|
+
|
|
133
|
+
## Notes
|
|
134
|
+
|
|
135
|
+
- **Playwright browsers are not bundled.** `uvx` installs the `playwright` wheel
|
|
136
|
+
but not the browser binaries. The plugin drives an existing browser over CDP, so
|
|
137
|
+
this is fine in that flow; a standalone user would need `playwright install`.
|
|
138
|
+
- **Logging is off by default.** Set `MCP_LOG_FILE=/path` (and optionally
|
|
139
|
+
`MCP_DEBUG=1`) to write logs to a file. The server never logs to stderr.
|
|
140
|
+
- **First-run latency.** `uvx` resolves and caches the dependency tree on first
|
|
141
|
+
launch, so the first start is slow; later starts are cached.
|
|
142
|
+
- `[tool.uv] exclude-newer` in `pyproject.toml` pins the resolver to packages
|
|
143
|
+
published before a cutoff for reproducibility; bump or remove it if a needed
|
|
144
|
+
dependency release is newer than the window.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "intuned-agent-mcp"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "MCP server exposing browser automation tools via intuned-runtime"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12,<4.0"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"intuned-browser",
|
|
9
|
+
"intuned-runtime>=1.3.24",
|
|
10
|
+
"playwright>=1.58.0,<2",
|
|
11
|
+
"mcp>=1.0,<2.0",
|
|
12
|
+
"pydantic>=2.0",
|
|
13
|
+
"beautifulsoup4>=4.12",
|
|
14
|
+
"lxml>=5.0",
|
|
15
|
+
# LangChain dependencies for selector tools
|
|
16
|
+
"langgraph>=0.2.0",
|
|
17
|
+
"langchain>=0.3.0",
|
|
18
|
+
"langchain-anthropic>=0.2.0",
|
|
19
|
+
"httpx>=0.27.0",
|
|
20
|
+
"langsmith>=0.1.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
intuned-agent-mcp = "intuned_agent_mcp.__main__:main"
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [
|
|
28
|
+
"pytest>=8.0",
|
|
29
|
+
"pytest-asyncio>=0.23",
|
|
30
|
+
"ruff>=0.7.0",
|
|
31
|
+
"pyright>=1.1.0",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[tool.pytest.ini_options]
|
|
35
|
+
asyncio_mode = "auto"
|
|
36
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
37
|
+
testpaths = ["src"]
|
|
38
|
+
python_files = ["test_*.py"]
|
|
39
|
+
python_classes = ["Test*"]
|
|
40
|
+
python_functions = ["test_*"]
|
|
41
|
+
markers = [
|
|
42
|
+
"e2e: marks tests as end-to-end (deselect with '-m \"not e2e\"')",
|
|
43
|
+
"headed: marks tests that require a headed browser",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[tool.ruff]
|
|
47
|
+
target-version = "py312"
|
|
48
|
+
line-length = 120
|
|
49
|
+
fix = false
|
|
50
|
+
show-fixes = true
|
|
51
|
+
|
|
52
|
+
[tool.ruff.format]
|
|
53
|
+
quote-style = "double"
|
|
54
|
+
indent-style = "space"
|
|
55
|
+
line-ending = "auto"
|
|
56
|
+
|
|
57
|
+
[tool.ruff.lint]
|
|
58
|
+
select = ["B", "E", "F", "I", "UP", "W", "N", "PTH", "C4", "ASYNC", "T20"]
|
|
59
|
+
ignore = ["E501", "E722", "ASYNC109"]
|
|
60
|
+
|
|
61
|
+
[tool.ruff.lint.per-file-ignores]
|
|
62
|
+
"**/test_*.py" = ["T20"]
|
|
63
|
+
"**/tests/*.py" = ["T20"]
|
|
64
|
+
|
|
65
|
+
[tool.ruff.lint.isort]
|
|
66
|
+
force-single-line = true
|
|
67
|
+
order-by-type = false
|
|
68
|
+
|
|
69
|
+
[tool.pyright]
|
|
70
|
+
pythonVersion = "3.12"
|
|
71
|
+
typeCheckingMode = "basic"
|
|
72
|
+
|
|
73
|
+
[build-system]
|
|
74
|
+
requires = ["hatchling"]
|
|
75
|
+
build-backend = "hatchling.build"
|
|
76
|
+
|
|
77
|
+
[dependency-groups]
|
|
78
|
+
dev = [
|
|
79
|
+
"pytest>=9.0.2",
|
|
80
|
+
"pytest-asyncio>=1.3.0",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
[tool.hatch.build.targets.wheel]
|
|
84
|
+
packages = ["src/intuned_agent_mcp"]
|
|
85
|
+
|
|
86
|
+
[[tool.uv.index]]
|
|
87
|
+
name = "testpypi"
|
|
88
|
+
url = "https://test.pypi.org/simple/"
|
|
89
|
+
publish-url = "https://test.pypi.org/legacy/"
|
|
90
|
+
explicit = true
|
|
91
|
+
|
|
92
|
+
[tool.uv]
|
|
93
|
+
exclude-newer = "7d"
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# ruff: noqa: T201 # CLI script — print() is intentional
|
|
2
|
+
#!/usr/bin/env python3
|
|
3
|
+
"""
|
|
4
|
+
Compare two LangSmith experiments from the reliable-selector-builder dataset, side by side.
|
|
5
|
+
|
|
6
|
+
`--a` and `--b` accept either a full experiment name or just the short preset prefix
|
|
7
|
+
(matching the `short` field in MODEL_PRESETS). When a prefix is passed, the split name
|
|
8
|
+
gets appended automatically, so a prefix like `flash-haikufb` combined with
|
|
9
|
+
`--split hard` resolves to the newest project matching `flash-haikufb-hard`.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
uv run python scripts/compare_experiments.py --split smoke
|
|
13
|
+
uv run python scripts/compare_experiments.py --a haiku --b flash-haikufb --split hard
|
|
14
|
+
uv run python scripts/compare_experiments.py --a haiku-haiku-smoke-fb374d62 --b flash-haikufb-smoke-4c716e4d
|
|
15
|
+
|
|
16
|
+
Env vars (same as run_selector_experiment.py): LANGSMITH_API_KEY in .env.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from dotenv import load_dotenv
|
|
24
|
+
|
|
25
|
+
DATASET_ID = "fae8988e-2753-4aef-bb53-e2f9b0120a36"
|
|
26
|
+
DEFAULT_A_PREFIX = "haiku"
|
|
27
|
+
DEFAULT_B_PREFIX = "flash-haikufb"
|
|
28
|
+
DEFAULT_SPLIT = "smoke"
|
|
29
|
+
SEPARATOR_WIDTH = 110
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class RunRecord:
|
|
34
|
+
outputs: dict[str, Any] | None
|
|
35
|
+
error: str | None
|
|
36
|
+
latency: float | None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _extract_selectors(container: dict[str, Any] | None) -> list[tuple[str, str]]:
|
|
40
|
+
"""Extract [(type, value), ...] from an object with a `selectors` key.
|
|
41
|
+
|
|
42
|
+
Used for both dataset reference outputs and experiment run outputs.
|
|
43
|
+
"""
|
|
44
|
+
if not container:
|
|
45
|
+
return []
|
|
46
|
+
return [(s.get("type", ""), s.get("value", "")) for s in container.get("selectors", [])]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _resolve_project(client, name_or_prefix: str) -> str:
|
|
50
|
+
"""Return the exact project name for a hit in LangSmith.
|
|
51
|
+
|
|
52
|
+
Exact match wins if found; otherwise the most recently started project whose
|
|
53
|
+
name contains the prefix is used.
|
|
54
|
+
"""
|
|
55
|
+
candidates = list(client.list_projects(name_contains=name_or_prefix))
|
|
56
|
+
if not candidates:
|
|
57
|
+
raise SystemExit(f"No LangSmith project matches {name_or_prefix!r}")
|
|
58
|
+
for p in candidates:
|
|
59
|
+
if p.name == name_or_prefix:
|
|
60
|
+
return p.name
|
|
61
|
+
candidates.sort(key=lambda p: getattr(p, "start_time", None) or p.id, reverse=True)
|
|
62
|
+
return candidates[0].name
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _load_runs(client, project_name: str) -> dict[str, RunRecord]:
|
|
66
|
+
"""Load root runs from a project keyed by reference example id."""
|
|
67
|
+
runs = list(client.list_runs(project_name=project_name, is_root=True))
|
|
68
|
+
out: dict[str, RunRecord] = {}
|
|
69
|
+
for r in runs:
|
|
70
|
+
ref = getattr(r, "reference_example_id", None)
|
|
71
|
+
if ref is None:
|
|
72
|
+
continue
|
|
73
|
+
out[str(ref)] = RunRecord(
|
|
74
|
+
outputs=r.outputs,
|
|
75
|
+
error=r.error,
|
|
76
|
+
latency=(r.end_time - r.start_time).total_seconds() if r.end_time else None,
|
|
77
|
+
)
|
|
78
|
+
return out
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _fmt_latency(record: RunRecord | None) -> str:
|
|
82
|
+
if record is None or record.latency is None:
|
|
83
|
+
return "—"
|
|
84
|
+
return f"{record.latency:.1f}s"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _print_selectors(label: str, record: RunRecord | None, max_len: int) -> None:
|
|
88
|
+
if record is None:
|
|
89
|
+
print(f" {label}: <missing>")
|
|
90
|
+
return
|
|
91
|
+
for j, (kind, value) in enumerate(_extract_selectors(record.outputs)):
|
|
92
|
+
print(f" {label} [{j}]: {kind:5s} {value[:max_len]}")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def main() -> None:
|
|
96
|
+
load_dotenv()
|
|
97
|
+
|
|
98
|
+
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
99
|
+
parser.add_argument("--a", default=DEFAULT_A_PREFIX, help="First experiment name or short prefix")
|
|
100
|
+
parser.add_argument("--b", default=DEFAULT_B_PREFIX, help="Second experiment name or short prefix")
|
|
101
|
+
parser.add_argument("--split", default=DEFAULT_SPLIT, help="Dataset split to compare")
|
|
102
|
+
parser.add_argument("--max-len", type=int, default=140, help="Truncate selector strings to this length")
|
|
103
|
+
args = parser.parse_args()
|
|
104
|
+
|
|
105
|
+
# If the user passed a bare preset short (no `-<split>-<hash>` suffix), append the split
|
|
106
|
+
# so they don't have to retype it for every run.
|
|
107
|
+
def _with_split(name: str) -> str:
|
|
108
|
+
return name if args.split in name else f"{name}-{args.split}"
|
|
109
|
+
|
|
110
|
+
from langsmith import Client
|
|
111
|
+
|
|
112
|
+
client = Client()
|
|
113
|
+
exp_a = _resolve_project(client, _with_split(args.a))
|
|
114
|
+
exp_b = _resolve_project(client, _with_split(args.b))
|
|
115
|
+
print(f"A = {exp_a}")
|
|
116
|
+
print(f"B = {exp_b}")
|
|
117
|
+
|
|
118
|
+
runs_a = _load_runs(client, exp_a)
|
|
119
|
+
runs_b = _load_runs(client, exp_b)
|
|
120
|
+
|
|
121
|
+
examples = list(client.list_examples(dataset_id=DATASET_ID, splits=[args.split]))
|
|
122
|
+
print(f"Split {args.split}: {len(examples)} examples (A landed: {len(runs_a)}, B landed: {len(runs_b)})")
|
|
123
|
+
print("=" * SEPARATOR_WIDTH)
|
|
124
|
+
|
|
125
|
+
a_ok = b_ok = agree = both_landed = 0
|
|
126
|
+
for i, ex in enumerate(examples, 1):
|
|
127
|
+
ex_id = str(ex.id)
|
|
128
|
+
ref = _extract_selectors(ex.outputs)
|
|
129
|
+
ra, rb = runs_a.get(ex_id), runs_b.get(ex_id)
|
|
130
|
+
|
|
131
|
+
inputs = ex.inputs or {}
|
|
132
|
+
desc = (inputs.get("description") or "")[:100]
|
|
133
|
+
candidates = inputs.get("candidates", [])
|
|
134
|
+
ncands = len(candidates)
|
|
135
|
+
has_container = any(c.get("container_full_xpath") for c in candidates)
|
|
136
|
+
|
|
137
|
+
print(
|
|
138
|
+
f"\n[{i:>2}] id={ex_id[:8]} cands={ncands} container={has_container} "
|
|
139
|
+
f"lat A={_fmt_latency(ra)} B={_fmt_latency(rb)}"
|
|
140
|
+
)
|
|
141
|
+
print(f" desc: {desc}")
|
|
142
|
+
for j, (kind, value) in enumerate(ref):
|
|
143
|
+
print(f" REF [{j}]: {kind:5s} {value[: args.max_len]}")
|
|
144
|
+
_print_selectors("A", ra, args.max_len)
|
|
145
|
+
_print_selectors("B", rb, args.max_len)
|
|
146
|
+
|
|
147
|
+
if ra is not None and rb is not None:
|
|
148
|
+
both_landed += 1
|
|
149
|
+
a_sel = _extract_selectors(ra.outputs)
|
|
150
|
+
b_sel = _extract_selectors(rb.outputs)
|
|
151
|
+
heq = a_sel == ref
|
|
152
|
+
geq = b_sel == ref
|
|
153
|
+
ag = a_sel == b_sel
|
|
154
|
+
a_ok += int(heq)
|
|
155
|
+
b_ok += int(geq)
|
|
156
|
+
agree += int(ag)
|
|
157
|
+
print(f" >> A==ref: {heq} B==ref: {geq} A==B: {ag}")
|
|
158
|
+
|
|
159
|
+
print("\n" + "=" * SEPARATOR_WIDTH)
|
|
160
|
+
print(f"Landed in both: {both_landed}/{len(examples)}")
|
|
161
|
+
print(f"A exact-match vs ref: {a_ok}/{both_landed}")
|
|
162
|
+
print(f"B exact-match vs ref: {b_ok}/{both_landed}")
|
|
163
|
+
print(f"A == B selectors: {agree}/{both_landed}")
|
|
164
|
+
print("(exact-match is a weak metric: the dataset ref is one valid selector among many)")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
main()
|