ha-mcp-dev 7.2.0.dev350__tar.gz → 7.2.0.dev351__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ha_mcp_dev-7.2.0.dev350/src/ha_mcp_dev.egg-info → ha_mcp_dev-7.2.0.dev351}/PKG-INFO +1 -1
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/pyproject.toml +1 -1
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/smart_search.py +98 -36
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/fuzzy_search.py +217 -33
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351/src/ha_mcp_dev.egg-info}/PKG-INFO +1 -1
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/LICENSE +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/MANIFEST.in +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/README.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/setup.cfg +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/__init__.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/__main__.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/_pypi_marker +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/auth/__init__.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/auth/consent_form.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/auth/provider.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/client/__init__.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/client/rest_client.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/client/websocket_client.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/client/websocket_listener.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/config.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/errors.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/py.typed +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/.claude/settings.json +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/.claude-plugin/marketplace.json +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/.claude-plugin/plugin.json +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/.github/ISSUE_TEMPLATE/skill-rca.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/AGENTS.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/CLAUDE.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/CONTRIBUTING.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/LICENSE +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/README.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/SKILL.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/evals/evals.json +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/automation-patterns.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/dashboard-cards.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/dashboard-guide.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/device-control.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/domain-docs.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/examples.yaml +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/helper-selection.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/safe-refactoring.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/template-guidelines.md +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/server.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/smoke_test.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/__init__.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/backup.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/best_practice_checker.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/device_control.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/enhanced.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/helpers.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/registry.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_addons.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_areas.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_blueprints.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_bug_report.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_calendar.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_camera.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_categories.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_automations.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_dashboards.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_entry_flow.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_helpers.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_scripts.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_entities.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_filesystem.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_groups.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_hacs.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_history.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_integrations.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_labels.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_mcp_component.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_registry.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_resources.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_search.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_service.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_services.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_system.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_todo.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_traces.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_updates.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_utility.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_voice_assistant.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_yaml_config.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_zones.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/util_helpers.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/transforms/__init__.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/transforms/categorized_search.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/__init__.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/domain_handlers.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/operation_manager.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/python_sandbox.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/usage_logger.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/SOURCES.txt +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/dependency_links.txt +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/entry_points.txt +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/requires.txt +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/top_level.txt +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/tests/__init__.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/tests/test_constants.py +0 -0
- {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/tests/test_env_manager.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ha-mcp-dev"
|
|
7
|
-
version = "7.2.0.
|
|
7
|
+
version = "7.2.0.dev351"
|
|
8
8
|
description = "Home Assistant MCP Server - Complete control of Home Assistant through MCP"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.13,<3.14"
|
|
@@ -11,7 +11,13 @@ from typing import Any
|
|
|
11
11
|
|
|
12
12
|
from ..client.rest_client import HomeAssistantClient
|
|
13
13
|
from ..config import get_global_settings
|
|
14
|
-
from ..utils.fuzzy_search import
|
|
14
|
+
from ..utils.fuzzy_search import (
|
|
15
|
+
BM25Scorer,
|
|
16
|
+
calculate_partial_ratio,
|
|
17
|
+
calculate_ratio,
|
|
18
|
+
create_fuzzy_searcher,
|
|
19
|
+
tokenize,
|
|
20
|
+
)
|
|
15
21
|
from .helpers import exception_to_structured_error
|
|
16
22
|
|
|
17
23
|
logger = logging.getLogger(__name__)
|
|
@@ -1429,53 +1435,109 @@ class SmartSearchTools:
|
|
|
1429
1435
|
query: str,
|
|
1430
1436
|
exact_match: bool = False,
|
|
1431
1437
|
) -> int:
|
|
1432
|
-
"""
|
|
1433
|
-
Recursively search for query string in nested dictionary/list structures.
|
|
1438
|
+
"""Search for query in nested dictionary/list structures.
|
|
1434
1439
|
|
|
1435
1440
|
When exact_match is True, uses substring matching (returns 100 if found, 0 if not).
|
|
1436
|
-
When exact_match is False,
|
|
1441
|
+
When exact_match is False, collects all string leaves, tokenizes them into a
|
|
1442
|
+
single BM25 document, and scores against the query tokens. Falls back to
|
|
1443
|
+
token-level SequenceMatcher if BM25 returns 0 (typo correction).
|
|
1437
1444
|
"""
|
|
1438
|
-
|
|
1439
|
-
|
|
1445
|
+
if exact_match:
|
|
1446
|
+
return self._search_in_dict_exact(data, query)
|
|
1447
|
+
|
|
1448
|
+
# Fuzzy path: collect all string leaves, build a single tokenised document
|
|
1449
|
+
leaves: list[str] = []
|
|
1450
|
+
self._collect_string_leaves(data, leaves)
|
|
1451
|
+
if not leaves:
|
|
1452
|
+
return 0
|
|
1453
|
+
|
|
1454
|
+
query_tokens = tokenize(query)
|
|
1455
|
+
if not query_tokens:
|
|
1456
|
+
return 0
|
|
1457
|
+
|
|
1458
|
+
# Build a single flat token list from all leaves
|
|
1459
|
+
doc_tokens: list[str] = []
|
|
1460
|
+
for leaf in leaves:
|
|
1461
|
+
doc_tokens.extend(tokenize(leaf))
|
|
1462
|
+
|
|
1463
|
+
if not doc_tokens:
|
|
1464
|
+
return 0
|
|
1465
|
+
|
|
1466
|
+
# Use BM25 with a 1-document corpus (the config dict as a single doc)
|
|
1467
|
+
scorer = BM25Scorer()
|
|
1468
|
+
scorer.fit([doc_tokens])
|
|
1469
|
+
raw = scorer.score(query_tokens, 0)
|
|
1470
|
+
|
|
1471
|
+
if raw > 0:
|
|
1472
|
+
# Normalise against the theoretical max (sum of IDF per query
|
|
1473
|
+
# token). With a 1-document corpus every token's IDF is identical
|
|
1474
|
+
# (~0.288 with smoothing), so the ratio effectively measures how
|
|
1475
|
+
# many query tokens the config contains. Cap at 100 for the edge
|
|
1476
|
+
# case where high TF pushes raw above the sum-of-IDFs baseline.
|
|
1477
|
+
max_possible = scorer.max_possible_score(query_tokens)
|
|
1478
|
+
if max_possible > 0:
|
|
1479
|
+
return min(100, round(raw / max_possible * 100))
|
|
1480
|
+
logger.warning(
|
|
1481
|
+
"BM25 scored > 0 but max_possible IDF is 0; "
|
|
1482
|
+
"query_tokens=%s, doc_tokens_len=%d",
|
|
1483
|
+
query_tokens,
|
|
1484
|
+
len(doc_tokens),
|
|
1485
|
+
)
|
|
1486
|
+
return 100
|
|
1487
|
+
|
|
1488
|
+
# Tier-3 fallback: token-level SequenceMatcher for typos
|
|
1489
|
+
logger.debug(
|
|
1490
|
+
"BM25 returned 0 for query_tokens=%s; "
|
|
1491
|
+
"falling back to SequenceMatcher typo scoring over %d unique tokens",
|
|
1492
|
+
query_tokens,
|
|
1493
|
+
len(set(doc_tokens)),
|
|
1494
|
+
)
|
|
1495
|
+
best = 0
|
|
1496
|
+
for qt in query_tokens:
|
|
1497
|
+
for dt in set(doc_tokens):
|
|
1498
|
+
best = max(best, calculate_ratio(qt, dt))
|
|
1499
|
+
return best if best >= 70 else 0
|
|
1500
|
+
|
|
1501
|
+
@staticmethod
|
|
1502
|
+
def _collect_string_leaves(
|
|
1503
|
+
data: dict[str, Any] | list[Any] | Any, out: list[str]
|
|
1504
|
+
) -> None:
|
|
1505
|
+
"""Recursively collect all string representations from nested data."""
|
|
1440
1506
|
if isinstance(data, dict):
|
|
1441
1507
|
for key, value in data.items():
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1508
|
+
out.append(str(key))
|
|
1509
|
+
SmartSearchTools._collect_string_leaves(value, out)
|
|
1510
|
+
elif isinstance(data, list):
|
|
1511
|
+
for item in data:
|
|
1512
|
+
SmartSearchTools._collect_string_leaves(item, out)
|
|
1513
|
+
elif isinstance(data, str):
|
|
1514
|
+
out.append(data)
|
|
1515
|
+
elif data is not None:
|
|
1516
|
+
out.append(str(data))
|
|
1448
1517
|
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1518
|
+
@staticmethod
|
|
1519
|
+
def _search_in_dict_exact(
|
|
1520
|
+
data: dict[str, Any] | list[Any] | Any,
|
|
1521
|
+
query: str,
|
|
1522
|
+
) -> int:
|
|
1523
|
+
"""Exact substring search in nested structures (returns 100 or 0)."""
|
|
1524
|
+
if isinstance(data, dict):
|
|
1525
|
+
for key, value in data.items():
|
|
1526
|
+
if query in str(key).lower():
|
|
1527
|
+
return 100
|
|
1528
|
+
if SmartSearchTools._search_in_dict_exact(value, query) >= 100:
|
|
1452
1529
|
return 100
|
|
1453
|
-
|
|
1454
1530
|
elif isinstance(data, list):
|
|
1455
1531
|
for item in data:
|
|
1456
|
-
|
|
1457
|
-
max_score = max(max_score, item_score)
|
|
1458
|
-
if exact_match and max_score >= 100:
|
|
1532
|
+
if SmartSearchTools._search_in_dict_exact(item, query) >= 100:
|
|
1459
1533
|
return 100
|
|
1460
|
-
|
|
1461
1534
|
elif isinstance(data, str):
|
|
1462
|
-
if
|
|
1463
|
-
|
|
1464
|
-
return 100
|
|
1465
|
-
else:
|
|
1466
|
-
max_score = max(max_score, calculate_partial_ratio(query, data.lower()))
|
|
1467
|
-
|
|
1535
|
+
if query in data.lower():
|
|
1536
|
+
return 100
|
|
1468
1537
|
elif data is not None:
|
|
1469
|
-
if
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
else:
|
|
1473
|
-
max_score = max(
|
|
1474
|
-
max_score,
|
|
1475
|
-
calculate_partial_ratio(query, str(data).lower()),
|
|
1476
|
-
)
|
|
1477
|
-
|
|
1478
|
-
return max_score
|
|
1538
|
+
if query in str(data).lower():
|
|
1539
|
+
return 100
|
|
1540
|
+
return 0
|
|
1479
1541
|
|
|
1480
1542
|
|
|
1481
1543
|
def create_smart_search_tools(
|
|
@@ -1,20 +1,139 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Fuzzy entity search utilities for Home Assistant MCP server.
|
|
3
3
|
|
|
4
|
-
This module
|
|
5
|
-
|
|
4
|
+
This module provides two search strategies:
|
|
5
|
+
- BM25 keyword search (primary fuzzy path): tokenized scoring with IDF term weighting,
|
|
6
|
+
effective for multi-word queries and short entity-name corpora.
|
|
7
|
+
- SequenceMatcher (tier-3 fallback): character-level similarity for single-token typo
|
|
8
|
+
correction when BM25 returns nothing.
|
|
9
|
+
|
|
10
|
+
See issue #851 for background on the BM25 migration.
|
|
6
11
|
"""
|
|
7
12
|
|
|
8
13
|
import logging
|
|
14
|
+
import math
|
|
15
|
+
import re
|
|
9
16
|
from collections.abc import Iterable
|
|
10
17
|
from difflib import SequenceMatcher
|
|
11
18
|
from typing import Any
|
|
12
19
|
|
|
13
20
|
logger = logging.getLogger(__name__)
|
|
14
21
|
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Tokenizer for HA entity IDs and friendly names
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
_SPLIT_RE = re.compile(r"[._\-\s]+")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def tokenize(text: str) -> list[str]:
|
|
30
|
+
"""Split text on `.`, `_`, `-`, and whitespace, lowercase, drop empties."""
|
|
31
|
+
return [t for t in _SPLIT_RE.split(text.lower()) if t]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# BM25 scorer – lightweight, zero-dependency
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class BM25Scorer:
|
|
40
|
+
"""BM25 (Okapi) scorer tuned for short HA entity-name documents.
|
|
41
|
+
|
|
42
|
+
Parameters are set conservatively for corpora of 2-5 token documents:
|
|
43
|
+
k1=1.2 - moderate term-frequency saturation
|
|
44
|
+
b=0.5 - reduced length-normalization (entity names are uniformly short)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, k1: float = 1.2, b: float = 0.5) -> None:
|
|
48
|
+
self.k1 = k1
|
|
49
|
+
self.b = b
|
|
50
|
+
# Populated by fit()
|
|
51
|
+
self._idf: dict[str, float] = {}
|
|
52
|
+
self._doc_tokens: list[list[str]] = []
|
|
53
|
+
self._doc_lens: list[int] = []
|
|
54
|
+
self._avgdl: float = 0.0
|
|
55
|
+
|
|
56
|
+
# -- corpus building ----------------------------------------------------
|
|
57
|
+
|
|
58
|
+
def fit(self, corpus: list[list[str]]) -> None:
|
|
59
|
+
"""Build IDF table from a pre-tokenized corpus."""
|
|
60
|
+
self._doc_tokens = corpus
|
|
61
|
+
n = len(corpus)
|
|
62
|
+
if n == 0:
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
self._doc_lens = [len(doc) for doc in corpus]
|
|
66
|
+
self._avgdl = sum(self._doc_lens) / n
|
|
67
|
+
# Guard against all-empty corpora: avoids nan from 0/0 in length normalization
|
|
68
|
+
if self._avgdl == 0.0:
|
|
69
|
+
self._avgdl = 1.0
|
|
70
|
+
|
|
71
|
+
# document frequency per token
|
|
72
|
+
df: dict[str, int] = {}
|
|
73
|
+
for doc in corpus:
|
|
74
|
+
seen: set[str] = set()
|
|
75
|
+
for token in doc:
|
|
76
|
+
if token not in seen:
|
|
77
|
+
df[token] = df.get(token, 0) + 1
|
|
78
|
+
seen.add(token)
|
|
79
|
+
|
|
80
|
+
# IDF with smoothing (Robertson variant)
|
|
81
|
+
self._idf = {
|
|
82
|
+
token: math.log((n - freq + 0.5) / (freq + 0.5) + 1.0)
|
|
83
|
+
for token, freq in df.items()
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# -- scoring ------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def score(self, query_tokens: list[str], doc_index: int) -> float:
|
|
89
|
+
"""Return the BM25 score for *query_tokens* against document at *doc_index*."""
|
|
90
|
+
doc = self._doc_tokens[doc_index]
|
|
91
|
+
dl = self._doc_lens[doc_index]
|
|
92
|
+
|
|
93
|
+
# term frequency in this document
|
|
94
|
+
tf: dict[str, int] = {}
|
|
95
|
+
for t in doc:
|
|
96
|
+
tf[t] = tf.get(t, 0) + 1
|
|
97
|
+
|
|
98
|
+
total = 0.0
|
|
99
|
+
for qt in query_tokens:
|
|
100
|
+
idf = self._idf.get(qt, 0.0)
|
|
101
|
+
f = tf.get(qt, 0)
|
|
102
|
+
if f == 0:
|
|
103
|
+
continue
|
|
104
|
+
numer = f * (self.k1 + 1)
|
|
105
|
+
denom = f + self.k1 * (1 - self.b + self.b * dl / self._avgdl)
|
|
106
|
+
total += idf * numer / denom
|
|
107
|
+
return total
|
|
108
|
+
|
|
109
|
+
def score_all(self, query_tokens: list[str]) -> list[float]:
|
|
110
|
+
"""Return BM25 scores for every document in the fitted corpus."""
|
|
111
|
+
return [self.score(query_tokens, i) for i in range(len(self._doc_tokens))]
|
|
112
|
+
|
|
113
|
+
def max_possible_score(self, query_tokens: list[str]) -> float:
|
|
114
|
+
"""Return the theoretical maximum BM25 score for *query_tokens*.
|
|
115
|
+
|
|
116
|
+
Used for absolute normalization: dividing a raw score by this produces
|
|
117
|
+
a 0-1 ratio representing how close a document is to a perfect match.
|
|
118
|
+
|
|
119
|
+
Query tokens absent from the corpus contribute the corpus's maximum
|
|
120
|
+
IDF as a penalty — this prevents partial matches from scoring as
|
|
121
|
+
perfect matches when the other query tokens simply do not exist in
|
|
122
|
+
the corpus.
|
|
123
|
+
"""
|
|
124
|
+
if not self._idf:
|
|
125
|
+
return 0.0
|
|
126
|
+
max_idf = max(self._idf.values())
|
|
127
|
+
return sum(self._idf.get(t, max_idf) for t in query_tokens)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
# FuzzyEntitySearcher – now BM25-primary with SequenceMatcher fallback
|
|
132
|
+
# ---------------------------------------------------------------------------
|
|
133
|
+
|
|
15
134
|
|
|
16
135
|
class FuzzyEntitySearcher:
|
|
17
|
-
"""
|
|
136
|
+
"""Entity search with BM25 keyword scoring and SequenceMatcher fallback."""
|
|
18
137
|
|
|
19
138
|
def __init__(self, threshold: int = 60):
|
|
20
139
|
"""Initialize with fuzzy matching threshold."""
|
|
@@ -24,14 +143,13 @@ class FuzzyEntitySearcher:
|
|
|
24
143
|
def search_entities(
|
|
25
144
|
self, entities: list[dict[str, Any]], query: str, limit: int = 10, offset: int = 0
|
|
26
145
|
) -> tuple[list[dict[str, Any]], int]:
|
|
27
|
-
"""
|
|
28
|
-
Search entities with fuzzy matching and intelligent scoring.
|
|
146
|
+
"""Search entities using BM25 scoring with SequenceMatcher typo fallback.
|
|
29
147
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
148
|
+
Strategy:
|
|
149
|
+
1. Tokenize every entity (entity_id + friendly_name) into a BM25 corpus.
|
|
150
|
+
2. Score all documents with BM25. Keep results above a positive threshold.
|
|
151
|
+
3. If BM25 returns nothing, fall back to token-level SequenceMatcher on
|
|
152
|
+
query tokens vs document tokens (catches single-character typos).
|
|
35
153
|
|
|
36
154
|
Returns:
|
|
37
155
|
Tuple of (paginated results list, total match count)
|
|
@@ -39,44 +157,110 @@ class FuzzyEntitySearcher:
|
|
|
39
157
|
if not query or not entities:
|
|
40
158
|
return [], 0
|
|
41
159
|
|
|
42
|
-
matches = []
|
|
43
160
|
query_lower = query.lower().strip()
|
|
161
|
+
query_tokens = tokenize(query_lower)
|
|
162
|
+
if not query_tokens:
|
|
163
|
+
return [], 0
|
|
164
|
+
|
|
165
|
+
# Build per-entity document: tokens from entity_id + friendly_name
|
|
166
|
+
docs: list[list[str]] = []
|
|
167
|
+
meta: list[tuple[str, str, str, dict[str, Any], str]] = [] # eid, name, domain, attrs, state
|
|
44
168
|
|
|
45
169
|
for entity in entities:
|
|
46
170
|
entity_id = entity.get("entity_id", "")
|
|
47
171
|
attributes = entity.get("attributes", {})
|
|
48
172
|
friendly_name = attributes.get("friendly_name", entity_id)
|
|
49
173
|
domain = entity_id.split(".")[0] if "." in entity_id else ""
|
|
174
|
+
state = entity.get("state", "unknown")
|
|
175
|
+
|
|
176
|
+
tokens = tokenize(entity_id) + tokenize(friendly_name)
|
|
177
|
+
docs.append(tokens)
|
|
178
|
+
meta.append((entity_id, friendly_name, domain, attributes, state))
|
|
179
|
+
|
|
180
|
+
# Fit BM25
|
|
181
|
+
scorer = BM25Scorer()
|
|
182
|
+
scorer.fit(docs)
|
|
183
|
+
raw_scores = scorer.score_all(query_tokens)
|
|
184
|
+
|
|
185
|
+
# Normalise against theoretical max (sum of IDFs) to produce absolute
|
|
186
|
+
# scores in the 0-100 range. Empirical-max normalization would always
|
|
187
|
+
# inflate the best match to 100 regardless of actual relevance, which
|
|
188
|
+
# defeats the purpose of a threshold-based quality gate.
|
|
189
|
+
theoretical_max = scorer.max_possible_score(query_tokens)
|
|
190
|
+
matches: list[dict[str, Any]] = []
|
|
191
|
+
|
|
192
|
+
if theoretical_max > 0:
|
|
193
|
+
for i, raw in enumerate(raw_scores):
|
|
194
|
+
if raw <= 0:
|
|
195
|
+
continue
|
|
196
|
+
score = min(100, round(raw / theoretical_max * 100))
|
|
197
|
+
if score < self.threshold:
|
|
198
|
+
continue
|
|
199
|
+
eid, fname, domain, attrs, state = meta[i]
|
|
200
|
+
matches.append({
|
|
201
|
+
"entity_id": eid,
|
|
202
|
+
"friendly_name": fname,
|
|
203
|
+
"domain": domain,
|
|
204
|
+
"state": state,
|
|
205
|
+
"attributes": attrs,
|
|
206
|
+
"score": score,
|
|
207
|
+
"match_type": self._get_match_type(eid, fname, domain, query_lower),
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
# Tier-3 fallback: token-level SequenceMatcher only if BM25 scored
|
|
211
|
+
# every document at zero. Firing the fallback when BM25 found valid
|
|
212
|
+
# partial matches (just below threshold) would allow a character-level
|
|
213
|
+
# match on the same token to inflate the score to 100, re-introducing
|
|
214
|
+
# exactly the noise floor the new absolute normalization is fixing.
|
|
215
|
+
bm25_found_any = any(raw > 0 for raw in raw_scores)
|
|
216
|
+
if not matches and not bm25_found_any:
|
|
217
|
+
matches = self._typo_fallback(query_tokens, query_lower, docs, meta)
|
|
50
218
|
|
|
51
|
-
# Calculate comprehensive score
|
|
52
|
-
score = self._calculate_entity_score(
|
|
53
|
-
entity_id, friendly_name, domain, query_lower
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
if score >= self.threshold:
|
|
57
|
-
matches.append(
|
|
58
|
-
{
|
|
59
|
-
"entity_id": entity_id,
|
|
60
|
-
"friendly_name": friendly_name,
|
|
61
|
-
"domain": domain,
|
|
62
|
-
"state": entity.get("state", "unknown"),
|
|
63
|
-
"attributes": attributes,
|
|
64
|
-
"score": score,
|
|
65
|
-
"match_type": self._get_match_type(
|
|
66
|
-
entity_id, friendly_name, domain, query_lower
|
|
67
|
-
),
|
|
68
|
-
}
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
# Sort by score descending
|
|
72
219
|
matches.sort(key=lambda x: x["score"], reverse=True)
|
|
73
220
|
total_matches = len(matches)
|
|
74
221
|
return matches[offset:offset + limit], total_matches
|
|
75
222
|
|
|
223
|
+
# -- private helpers -----------------------------------------------------
|
|
224
|
+
|
|
225
|
+
def _typo_fallback(
|
|
226
|
+
self,
|
|
227
|
+
query_tokens: list[str],
|
|
228
|
+
query_lower: str,
|
|
229
|
+
docs: list[list[str]],
|
|
230
|
+
meta: list[tuple[str, str, str, dict[str, Any], str]],
|
|
231
|
+
) -> list[dict[str, Any]]:
|
|
232
|
+
"""Token-level SequenceMatcher fallback for typo correction."""
|
|
233
|
+
results: list[dict[str, Any]] = []
|
|
234
|
+
for i, doc_tokens in enumerate(docs):
|
|
235
|
+
best_token_score = 0
|
|
236
|
+
for qt in query_tokens:
|
|
237
|
+
for dt in doc_tokens:
|
|
238
|
+
ratio = calculate_ratio(qt, dt)
|
|
239
|
+
best_token_score = max(best_token_score, ratio)
|
|
240
|
+
|
|
241
|
+
if best_token_score >= 75: # stricter threshold for typo fallback
|
|
242
|
+
eid, fname, domain, attrs, state = meta[i]
|
|
243
|
+
results.append({
|
|
244
|
+
"entity_id": eid,
|
|
245
|
+
"friendly_name": fname,
|
|
246
|
+
"domain": domain,
|
|
247
|
+
"state": state,
|
|
248
|
+
"attributes": attrs,
|
|
249
|
+
"score": best_token_score,
|
|
250
|
+
"match_type": "typo_fallback",
|
|
251
|
+
})
|
|
252
|
+
return results
|
|
253
|
+
|
|
76
254
|
def _calculate_entity_score(
|
|
77
255
|
self, entity_id: str, friendly_name: str, domain: str, query: str
|
|
78
256
|
) -> int:
|
|
79
|
-
"""Calculate comprehensive fuzzy score for an entity.
|
|
257
|
+
"""Calculate a comprehensive fuzzy score for an entity name/domain.
|
|
258
|
+
|
|
259
|
+
Actively used by ``ha_deep_search`` name scoring (automation, script,
|
|
260
|
+
helper phases) to produce a score comparable to the legacy additive
|
|
261
|
+
output those paths already rely on. Do not remove without migrating
|
|
262
|
+
the deep-search callers to a BM25-based scheme.
|
|
263
|
+
"""
|
|
80
264
|
score = 0
|
|
81
265
|
|
|
82
266
|
# Exact matches get highest scores
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/AGENTS.md
RENAMED
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/CLAUDE.md
RENAMED
|
File without changes
|
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/LICENSE
RENAMED
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/best_practice_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_automations.py
RENAMED
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_dashboards.py
RENAMED
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_entry_flow.py
RENAMED
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_helpers.py
RENAMED
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_scripts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_voice_assistant.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/transforms/categorized_search.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|