ha-mcp-dev 7.2.0.dev350__tar.gz → 7.2.0.dev351__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {ha_mcp_dev-7.2.0.dev350/src/ha_mcp_dev.egg-info → ha_mcp_dev-7.2.0.dev351}/PKG-INFO +1 -1
  2. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/pyproject.toml +1 -1
  3. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/smart_search.py +98 -36
  4. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/fuzzy_search.py +217 -33
  5. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351/src/ha_mcp_dev.egg-info}/PKG-INFO +1 -1
  6. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/LICENSE +0 -0
  7. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/MANIFEST.in +0 -0
  8. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/README.md +0 -0
  9. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/setup.cfg +0 -0
  10. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/__init__.py +0 -0
  11. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/__main__.py +0 -0
  12. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/_pypi_marker +0 -0
  13. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/auth/__init__.py +0 -0
  14. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/auth/consent_form.py +0 -0
  15. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/auth/provider.py +0 -0
  16. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/client/__init__.py +0 -0
  17. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/client/rest_client.py +0 -0
  18. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/client/websocket_client.py +0 -0
  19. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/client/websocket_listener.py +0 -0
  20. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/config.py +0 -0
  21. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/errors.py +0 -0
  22. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/py.typed +0 -0
  23. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/.claude/settings.json +0 -0
  24. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/.claude-plugin/marketplace.json +0 -0
  25. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/.claude-plugin/plugin.json +0 -0
  26. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/.github/ISSUE_TEMPLATE/skill-rca.md +0 -0
  27. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/AGENTS.md +0 -0
  28. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/CLAUDE.md +0 -0
  29. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/CONTRIBUTING.md +0 -0
  30. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/LICENSE +0 -0
  31. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/README.md +0 -0
  32. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/SKILL.md +0 -0
  33. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/evals/evals.json +0 -0
  34. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/automation-patterns.md +0 -0
  35. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/dashboard-cards.md +0 -0
  36. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/dashboard-guide.md +0 -0
  37. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/device-control.md +0 -0
  38. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/domain-docs.md +0 -0
  39. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/examples.yaml +0 -0
  40. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/helper-selection.md +0 -0
  41. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/safe-refactoring.md +0 -0
  42. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/resources/skills-vendor/skills/home-assistant-best-practices/references/template-guidelines.md +0 -0
  43. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/server.py +0 -0
  44. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/smoke_test.py +0 -0
  45. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/__init__.py +0 -0
  46. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/backup.py +0 -0
  47. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/best_practice_checker.py +0 -0
  48. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/device_control.py +0 -0
  49. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/enhanced.py +0 -0
  50. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/helpers.py +0 -0
  51. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/registry.py +0 -0
  52. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_addons.py +0 -0
  53. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_areas.py +0 -0
  54. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_blueprints.py +0 -0
  55. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_bug_report.py +0 -0
  56. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_calendar.py +0 -0
  57. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_camera.py +0 -0
  58. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_categories.py +0 -0
  59. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_automations.py +0 -0
  60. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_dashboards.py +0 -0
  61. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_entry_flow.py +0 -0
  62. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_helpers.py +0 -0
  63. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_config_scripts.py +0 -0
  64. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_entities.py +0 -0
  65. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_filesystem.py +0 -0
  66. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_groups.py +0 -0
  67. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_hacs.py +0 -0
  68. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_history.py +0 -0
  69. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_integrations.py +0 -0
  70. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_labels.py +0 -0
  71. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_mcp_component.py +0 -0
  72. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_registry.py +0 -0
  73. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_resources.py +0 -0
  74. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_search.py +0 -0
  75. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_service.py +0 -0
  76. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_services.py +0 -0
  77. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_system.py +0 -0
  78. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_todo.py +0 -0
  79. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_traces.py +0 -0
  80. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_updates.py +0 -0
  81. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_utility.py +0 -0
  82. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_voice_assistant.py +0 -0
  83. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_yaml_config.py +0 -0
  84. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/tools_zones.py +0 -0
  85. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/tools/util_helpers.py +0 -0
  86. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/transforms/__init__.py +0 -0
  87. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/transforms/categorized_search.py +0 -0
  88. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/__init__.py +0 -0
  89. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/domain_handlers.py +0 -0
  90. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/operation_manager.py +0 -0
  91. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/python_sandbox.py +0 -0
  92. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp/utils/usage_logger.py +0 -0
  93. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/SOURCES.txt +0 -0
  94. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/dependency_links.txt +0 -0
  95. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/entry_points.txt +0 -0
  96. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/requires.txt +0 -0
  97. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/src/ha_mcp_dev.egg-info/top_level.txt +0 -0
  98. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/tests/__init__.py +0 -0
  99. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/tests/test_constants.py +0 -0
  100. {ha_mcp_dev-7.2.0.dev350 → ha_mcp_dev-7.2.0.dev351}/tests/test_env_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ha-mcp-dev
3
- Version: 7.2.0.dev350
3
+ Version: 7.2.0.dev351
4
4
  Summary: Home Assistant MCP Server - Complete control of Home Assistant through MCP
5
5
  Author-email: Julien <github@qc-h.net>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ha-mcp-dev"
7
- version = "7.2.0.dev350"
7
+ version = "7.2.0.dev351"
8
8
  description = "Home Assistant MCP Server - Complete control of Home Assistant through MCP"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.13,<3.14"
@@ -11,7 +11,13 @@ from typing import Any
11
11
 
12
12
  from ..client.rest_client import HomeAssistantClient
13
13
  from ..config import get_global_settings
14
- from ..utils.fuzzy_search import calculate_partial_ratio, create_fuzzy_searcher
14
+ from ..utils.fuzzy_search import (
15
+ BM25Scorer,
16
+ calculate_partial_ratio,
17
+ calculate_ratio,
18
+ create_fuzzy_searcher,
19
+ tokenize,
20
+ )
15
21
  from .helpers import exception_to_structured_error
16
22
 
17
23
  logger = logging.getLogger(__name__)
@@ -1429,53 +1435,109 @@ class SmartSearchTools:
1429
1435
  query: str,
1430
1436
  exact_match: bool = False,
1431
1437
  ) -> int:
1432
- """
1433
- Recursively search for query string in nested dictionary/list structures.
1438
+ """Search for query in nested dictionary/list structures.
1434
1439
 
1435
1440
  When exact_match is True, uses substring matching (returns 100 if found, 0 if not).
1436
- When exact_match is False, uses fuzzy matching with partial ratio scoring.
1441
+ When exact_match is False, collects all string leaves, tokenizes them into a
1442
+ single BM25 document, and scores against the query tokens. Falls back to
1443
+ token-level SequenceMatcher if BM25 returns 0 (typo correction).
1437
1444
  """
1438
- max_score = 0
1439
-
1445
+ if exact_match:
1446
+ return self._search_in_dict_exact(data, query)
1447
+
1448
+ # Fuzzy path: collect all string leaves, build a single tokenised document
1449
+ leaves: list[str] = []
1450
+ self._collect_string_leaves(data, leaves)
1451
+ if not leaves:
1452
+ return 0
1453
+
1454
+ query_tokens = tokenize(query)
1455
+ if not query_tokens:
1456
+ return 0
1457
+
1458
+ # Build a single flat token list from all leaves
1459
+ doc_tokens: list[str] = []
1460
+ for leaf in leaves:
1461
+ doc_tokens.extend(tokenize(leaf))
1462
+
1463
+ if not doc_tokens:
1464
+ return 0
1465
+
1466
+ # Use BM25 with a 1-document corpus (the config dict as a single doc)
1467
+ scorer = BM25Scorer()
1468
+ scorer.fit([doc_tokens])
1469
+ raw = scorer.score(query_tokens, 0)
1470
+
1471
+ if raw > 0:
1472
+ # Normalise against the theoretical max (sum of IDF per query
1473
+ # token). With a 1-document corpus every token's IDF is identical
1474
+ # (~0.288 with smoothing), so the ratio effectively measures how
1475
+ # many query tokens the config contains. Cap at 100 for the edge
1476
+ # case where high TF pushes raw above the sum-of-IDFs baseline.
1477
+ max_possible = scorer.max_possible_score(query_tokens)
1478
+ if max_possible > 0:
1479
+ return min(100, round(raw / max_possible * 100))
1480
+ logger.warning(
1481
+ "BM25 scored > 0 but max_possible IDF is 0; "
1482
+ "query_tokens=%s, doc_tokens_len=%d",
1483
+ query_tokens,
1484
+ len(doc_tokens),
1485
+ )
1486
+ return 100
1487
+
1488
+ # Tier-3 fallback: token-level SequenceMatcher for typos
1489
+ logger.debug(
1490
+ "BM25 returned 0 for query_tokens=%s; "
1491
+ "falling back to SequenceMatcher typo scoring over %d unique tokens",
1492
+ query_tokens,
1493
+ len(set(doc_tokens)),
1494
+ )
1495
+ best = 0
1496
+ for qt in query_tokens:
1497
+ for dt in set(doc_tokens):
1498
+ best = max(best, calculate_ratio(qt, dt))
1499
+ return best if best >= 70 else 0
1500
+
1501
+ @staticmethod
1502
+ def _collect_string_leaves(
1503
+ data: dict[str, Any] | list[Any] | Any, out: list[str]
1504
+ ) -> None:
1505
+ """Recursively collect all string representations from nested data."""
1440
1506
  if isinstance(data, dict):
1441
1507
  for key, value in data.items():
1442
- if exact_match:
1443
- if query in str(key).lower():
1444
- return 100
1445
- else:
1446
- key_score = calculate_partial_ratio(query, str(key).lower())
1447
- max_score = max(max_score, key_score)
1508
+ out.append(str(key))
1509
+ SmartSearchTools._collect_string_leaves(value, out)
1510
+ elif isinstance(data, list):
1511
+ for item in data:
1512
+ SmartSearchTools._collect_string_leaves(item, out)
1513
+ elif isinstance(data, str):
1514
+ out.append(data)
1515
+ elif data is not None:
1516
+ out.append(str(data))
1448
1517
 
1449
- value_score = self._search_in_dict(value, query, exact_match)
1450
- max_score = max(max_score, value_score)
1451
- if exact_match and max_score >= 100:
1518
+ @staticmethod
1519
+ def _search_in_dict_exact(
1520
+ data: dict[str, Any] | list[Any] | Any,
1521
+ query: str,
1522
+ ) -> int:
1523
+ """Exact substring search in nested structures (returns 100 or 0)."""
1524
+ if isinstance(data, dict):
1525
+ for key, value in data.items():
1526
+ if query in str(key).lower():
1527
+ return 100
1528
+ if SmartSearchTools._search_in_dict_exact(value, query) >= 100:
1452
1529
  return 100
1453
-
1454
1530
  elif isinstance(data, list):
1455
1531
  for item in data:
1456
- item_score = self._search_in_dict(item, query, exact_match)
1457
- max_score = max(max_score, item_score)
1458
- if exact_match and max_score >= 100:
1532
+ if SmartSearchTools._search_in_dict_exact(item, query) >= 100:
1459
1533
  return 100
1460
-
1461
1534
  elif isinstance(data, str):
1462
- if exact_match:
1463
- if query in data.lower():
1464
- return 100
1465
- else:
1466
- max_score = max(max_score, calculate_partial_ratio(query, data.lower()))
1467
-
1535
+ if query in data.lower():
1536
+ return 100
1468
1537
  elif data is not None:
1469
- if exact_match:
1470
- if query in str(data).lower():
1471
- return 100
1472
- else:
1473
- max_score = max(
1474
- max_score,
1475
- calculate_partial_ratio(query, str(data).lower()),
1476
- )
1477
-
1478
- return max_score
1538
+ if query in str(data).lower():
1539
+ return 100
1540
+ return 0
1479
1541
 
1480
1542
 
1481
1543
  def create_smart_search_tools(
@@ -1,20 +1,139 @@
1
1
  """
2
2
  Fuzzy entity search utilities for Home Assistant MCP server.
3
3
 
4
- This module uses Python's built-in difflib for string similarity calculations,
5
- eliminating the need for external dependencies like textdistance and numpy.
4
+ This module provides two search strategies:
5
+ - BM25 keyword search (primary fuzzy path): tokenized scoring with IDF term weighting,
6
+ effective for multi-word queries and short entity-name corpora.
7
+ - SequenceMatcher (tier-3 fallback): character-level similarity for single-token typo
8
+ correction when BM25 returns nothing.
9
+
10
+ See issue #851 for background on the BM25 migration.
6
11
  """
7
12
 
8
13
  import logging
14
+ import math
15
+ import re
9
16
  from collections.abc import Iterable
10
17
  from difflib import SequenceMatcher
11
18
  from typing import Any
12
19
 
13
20
  logger = logging.getLogger(__name__)
14
21
 
22
+ # ---------------------------------------------------------------------------
23
+ # Tokenizer for HA entity IDs and friendly names
24
+ # ---------------------------------------------------------------------------
25
+
26
+ _SPLIT_RE = re.compile(r"[._\-\s]+")
27
+
28
+
29
+ def tokenize(text: str) -> list[str]:
30
+ """Split text on `.`, `_`, `-`, and whitespace, lowercase, drop empties."""
31
+ return [t for t in _SPLIT_RE.split(text.lower()) if t]
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # BM25 scorer – lightweight, zero-dependency
36
+ # ---------------------------------------------------------------------------
37
+
38
+
39
+ class BM25Scorer:
40
+ """BM25 (Okapi) scorer tuned for short HA entity-name documents.
41
+
42
+ Parameters are set conservatively for corpora of 2-5 token documents:
43
+ k1=1.2 - moderate term-frequency saturation
44
+ b=0.5 - reduced length-normalization (entity names are uniformly short)
45
+ """
46
+
47
+ def __init__(self, k1: float = 1.2, b: float = 0.5) -> None:
48
+ self.k1 = k1
49
+ self.b = b
50
+ # Populated by fit()
51
+ self._idf: dict[str, float] = {}
52
+ self._doc_tokens: list[list[str]] = []
53
+ self._doc_lens: list[int] = []
54
+ self._avgdl: float = 0.0
55
+
56
+ # -- corpus building ----------------------------------------------------
57
+
58
+ def fit(self, corpus: list[list[str]]) -> None:
59
+ """Build IDF table from a pre-tokenized corpus."""
60
+ self._doc_tokens = corpus
61
+ n = len(corpus)
62
+ if n == 0:
63
+ return
64
+
65
+ self._doc_lens = [len(doc) for doc in corpus]
66
+ self._avgdl = sum(self._doc_lens) / n
67
+ # Guard against all-empty corpora: avoids nan from 0/0 in length normalization
68
+ if self._avgdl == 0.0:
69
+ self._avgdl = 1.0
70
+
71
+ # document frequency per token
72
+ df: dict[str, int] = {}
73
+ for doc in corpus:
74
+ seen: set[str] = set()
75
+ for token in doc:
76
+ if token not in seen:
77
+ df[token] = df.get(token, 0) + 1
78
+ seen.add(token)
79
+
80
+ # IDF with smoothing (Robertson variant)
81
+ self._idf = {
82
+ token: math.log((n - freq + 0.5) / (freq + 0.5) + 1.0)
83
+ for token, freq in df.items()
84
+ }
85
+
86
+ # -- scoring ------------------------------------------------------------
87
+
88
+ def score(self, query_tokens: list[str], doc_index: int) -> float:
89
+ """Return the BM25 score for *query_tokens* against document at *doc_index*."""
90
+ doc = self._doc_tokens[doc_index]
91
+ dl = self._doc_lens[doc_index]
92
+
93
+ # term frequency in this document
94
+ tf: dict[str, int] = {}
95
+ for t in doc:
96
+ tf[t] = tf.get(t, 0) + 1
97
+
98
+ total = 0.0
99
+ for qt in query_tokens:
100
+ idf = self._idf.get(qt, 0.0)
101
+ f = tf.get(qt, 0)
102
+ if f == 0:
103
+ continue
104
+ numer = f * (self.k1 + 1)
105
+ denom = f + self.k1 * (1 - self.b + self.b * dl / self._avgdl)
106
+ total += idf * numer / denom
107
+ return total
108
+
109
+ def score_all(self, query_tokens: list[str]) -> list[float]:
110
+ """Return BM25 scores for every document in the fitted corpus."""
111
+ return [self.score(query_tokens, i) for i in range(len(self._doc_tokens))]
112
+
113
+ def max_possible_score(self, query_tokens: list[str]) -> float:
114
+ """Return the theoretical maximum BM25 score for *query_tokens*.
115
+
116
+ Used for absolute normalization: dividing a raw score by this produces
117
+ a 0-1 ratio representing how close a document is to a perfect match.
118
+
119
+ Query tokens absent from the corpus contribute the corpus's maximum
120
+ IDF as a penalty — this prevents partial matches from scoring as
121
+ perfect matches when the other query tokens simply do not exist in
122
+ the corpus.
123
+ """
124
+ if not self._idf:
125
+ return 0.0
126
+ max_idf = max(self._idf.values())
127
+ return sum(self._idf.get(t, max_idf) for t in query_tokens)
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # FuzzyEntitySearcher – now BM25-primary with SequenceMatcher fallback
132
+ # ---------------------------------------------------------------------------
133
+
15
134
 
16
135
  class FuzzyEntitySearcher:
17
- """Advanced fuzzy entity search with AI-optimized scoring."""
136
+ """Entity search with BM25 keyword scoring and SequenceMatcher fallback."""
18
137
 
19
138
  def __init__(self, threshold: int = 60):
20
139
  """Initialize with fuzzy matching threshold."""
@@ -24,14 +143,13 @@ class FuzzyEntitySearcher:
24
143
  def search_entities(
25
144
  self, entities: list[dict[str, Any]], query: str, limit: int = 10, offset: int = 0
26
145
  ) -> tuple[list[dict[str, Any]], int]:
27
- """
28
- Search entities with fuzzy matching and intelligent scoring.
146
+ """Search entities using BM25 scoring with SequenceMatcher typo fallback.
29
147
 
30
- Args:
31
- entities: List of Home Assistant entity states
32
- query: Search query (can be partial, with typos)
33
- limit: Maximum number of results
34
- offset: Number of results to skip for pagination
148
+ Strategy:
149
+ 1. Tokenize every entity (entity_id + friendly_name) into a BM25 corpus.
150
+ 2. Score all documents with BM25. Keep results above a positive threshold.
151
+ 3. If BM25 returns nothing, fall back to token-level SequenceMatcher on
152
+ query tokens vs document tokens (catches single-character typos).
35
153
 
36
154
  Returns:
37
155
  Tuple of (paginated results list, total match count)
@@ -39,44 +157,110 @@ class FuzzyEntitySearcher:
39
157
  if not query or not entities:
40
158
  return [], 0
41
159
 
42
- matches = []
43
160
  query_lower = query.lower().strip()
161
+ query_tokens = tokenize(query_lower)
162
+ if not query_tokens:
163
+ return [], 0
164
+
165
+ # Build per-entity document: tokens from entity_id + friendly_name
166
+ docs: list[list[str]] = []
167
+ meta: list[tuple[str, str, str, dict[str, Any], str]] = [] # eid, name, domain, attrs, state
44
168
 
45
169
  for entity in entities:
46
170
  entity_id = entity.get("entity_id", "")
47
171
  attributes = entity.get("attributes", {})
48
172
  friendly_name = attributes.get("friendly_name", entity_id)
49
173
  domain = entity_id.split(".")[0] if "." in entity_id else ""
174
+ state = entity.get("state", "unknown")
175
+
176
+ tokens = tokenize(entity_id) + tokenize(friendly_name)
177
+ docs.append(tokens)
178
+ meta.append((entity_id, friendly_name, domain, attributes, state))
179
+
180
+ # Fit BM25
181
+ scorer = BM25Scorer()
182
+ scorer.fit(docs)
183
+ raw_scores = scorer.score_all(query_tokens)
184
+
185
+ # Normalise against theoretical max (sum of IDFs) to produce absolute
186
+ # scores in the 0-100 range. Empirical-max normalization would always
187
+ # inflate the best match to 100 regardless of actual relevance, which
188
+ # defeats the purpose of a threshold-based quality gate.
189
+ theoretical_max = scorer.max_possible_score(query_tokens)
190
+ matches: list[dict[str, Any]] = []
191
+
192
+ if theoretical_max > 0:
193
+ for i, raw in enumerate(raw_scores):
194
+ if raw <= 0:
195
+ continue
196
+ score = min(100, round(raw / theoretical_max * 100))
197
+ if score < self.threshold:
198
+ continue
199
+ eid, fname, domain, attrs, state = meta[i]
200
+ matches.append({
201
+ "entity_id": eid,
202
+ "friendly_name": fname,
203
+ "domain": domain,
204
+ "state": state,
205
+ "attributes": attrs,
206
+ "score": score,
207
+ "match_type": self._get_match_type(eid, fname, domain, query_lower),
208
+ })
209
+
210
+ # Tier-3 fallback: token-level SequenceMatcher only if BM25 scored
211
+ # every document at zero. Firing the fallback when BM25 found valid
212
+ # partial matches (just below threshold) would allow a character-level
213
+ # match on the same token to inflate the score to 100, re-introducing
214
+ # exactly the noise floor the new absolute normalization is fixing.
215
+ bm25_found_any = any(raw > 0 for raw in raw_scores)
216
+ if not matches and not bm25_found_any:
217
+ matches = self._typo_fallback(query_tokens, query_lower, docs, meta)
50
218
 
51
- # Calculate comprehensive score
52
- score = self._calculate_entity_score(
53
- entity_id, friendly_name, domain, query_lower
54
- )
55
-
56
- if score >= self.threshold:
57
- matches.append(
58
- {
59
- "entity_id": entity_id,
60
- "friendly_name": friendly_name,
61
- "domain": domain,
62
- "state": entity.get("state", "unknown"),
63
- "attributes": attributes,
64
- "score": score,
65
- "match_type": self._get_match_type(
66
- entity_id, friendly_name, domain, query_lower
67
- ),
68
- }
69
- )
70
-
71
- # Sort by score descending
72
219
  matches.sort(key=lambda x: x["score"], reverse=True)
73
220
  total_matches = len(matches)
74
221
  return matches[offset:offset + limit], total_matches
75
222
 
223
+ # -- private helpers -----------------------------------------------------
224
+
225
+ def _typo_fallback(
226
+ self,
227
+ query_tokens: list[str],
228
+ query_lower: str,
229
+ docs: list[list[str]],
230
+ meta: list[tuple[str, str, str, dict[str, Any], str]],
231
+ ) -> list[dict[str, Any]]:
232
+ """Token-level SequenceMatcher fallback for typo correction."""
233
+ results: list[dict[str, Any]] = []
234
+ for i, doc_tokens in enumerate(docs):
235
+ best_token_score = 0
236
+ for qt in query_tokens:
237
+ for dt in doc_tokens:
238
+ ratio = calculate_ratio(qt, dt)
239
+ best_token_score = max(best_token_score, ratio)
240
+
241
+ if best_token_score >= 75: # stricter threshold for typo fallback
242
+ eid, fname, domain, attrs, state = meta[i]
243
+ results.append({
244
+ "entity_id": eid,
245
+ "friendly_name": fname,
246
+ "domain": domain,
247
+ "state": state,
248
+ "attributes": attrs,
249
+ "score": best_token_score,
250
+ "match_type": "typo_fallback",
251
+ })
252
+ return results
253
+
76
254
  def _calculate_entity_score(
77
255
  self, entity_id: str, friendly_name: str, domain: str, query: str
78
256
  ) -> int:
79
- """Calculate comprehensive fuzzy score for an entity."""
257
+ """Calculate a comprehensive fuzzy score for an entity name/domain.
258
+
259
+ Actively used by ``ha_deep_search`` name scoring (automation, script,
260
+ helper phases) to produce a score comparable to the legacy additive
261
+ output those paths already rely on. Do not remove without migrating
262
+ the deep-search callers to a BM25-based scheme.
263
+ """
80
264
  score = 0
81
265
 
82
266
  # Exact matches get highest scores
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ha-mcp-dev
3
- Version: 7.2.0.dev350
3
+ Version: 7.2.0.dev351
4
4
  Summary: Home Assistant MCP Server - Complete control of Home Assistant through MCP
5
5
  Author-email: Julien <github@qc-h.net>
6
6
  License: MIT