PyPI - microsoft-agents-hosting-dialogs - Versions diffs - 0.10.0.dev2__py3-none-any.whl - Mend

microsoft-agents-hosting-dialogs 0.10.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

microsoft_agents/hosting/dialogs/choices/choice_recognizer.py ADDED Viewed

@@ -0,0 +1,148 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from collections.abc import Iterable
+from recognizers_number import NumberModel, NumberRecognizer, OrdinalModel
+from recognizers_text import Culture
+from typing import cast
+from .models.choice import Choice
+from .find import Find
+from .models.find_choices_options import FindChoicesOptions
+from .models.found_choice import FoundChoice
+from .models.model_result import ModelResult
+class ChoiceRecognizers:
+    """Contains methods for matching user input against a list of choices."""
+    @staticmethod
+    def recognize_choices(
+        utterance: str,
+        choices: Iterable[str | Choice],
+        options: FindChoicesOptions | None = None,
+    ) -> list[ModelResult]:
+        """
+        Matches user input against a list of choices.
+        This is layered above the `Find.find_choices()` function, and adds logic to let the user specify
+        their choice by index (they can say "one" to pick `choice[0]`) or ordinal position
+         (they can say "the second one" to pick `choice[1]`.)
+        The user's utterance is recognized in the following order:
+        - By name using `find_choices()`
+        - By 1's based ordinal position.
+        - By 1's based index position.
+        Parameters
+        -----------
+        utterance: The input.
+        choices: The list of choices.
+        options: (Optional) Options to control the recognition strategy.
+        Returns
+        --------
+        A list of found choices, sorted by most relevant first.
+        """
+        if utterance is None:
+            utterance = ""
+        # Normalize list of choices
+        choices_list = [
+            Choice(value=choice) if isinstance(choice, str) else choice
+            for choice in choices
+        ]
+        # Try finding choices by text search first
+        # - We only want to use a single strategy for returning results to avoid issues where utterances
+        #   like the "the third one" or "the red one" or "the first division book" would miss-recognize as
+        #   a numerical index or ordinal as well.
+        locale = options.locale if (options and options.locale) else Culture.English
+        matched = Find.find_choices(utterance, choices_list, options)
+        if not matched:
+            matches = []
+            if not options or options.recognize_ordinals:
+                # Next try finding by ordinal
+                matches = ChoiceRecognizers._recognize_ordinal(utterance, locale)
+                for match in matches:
+                    ChoiceRecognizers._match_choice_by_index(
+                        choices_list, matched, match
+                    )
+            if not matches and (not options or options.recognize_numbers):
+                # Then try by numerical index
+                matches = ChoiceRecognizers._recognize_number(utterance, locale)
+                for match in matches:
+                    ChoiceRecognizers._match_choice_by_index(
+                        choices_list, matched, match
+                    )
+            # Sort any found matches by their position within the utterance.
+            # - The results from find_choices() are already properly sorted so we just need this
+            #   for ordinal & numerical lookups.
+            matched = sorted(matched, key=lambda model_result: model_result.start)
+        return matched
+    @staticmethod
+    def _recognize_ordinal(utterance: str, culture: str) -> list[ModelResult]:
+        model: OrdinalModel = cast(
+            OrdinalModel, NumberRecognizer(culture).get_ordinal_model(culture)
+        )
+        return list(
+            map(ChoiceRecognizers._found_choice_constructor, model.parse(utterance))  # type: ignore[arg-type]
+        )
+    @staticmethod
+    def _match_choice_by_index(
+        choices: list[Choice], matched: list[ModelResult], match: ModelResult
+    ):
+        try:
+            index: int = int(match.resolution.value) - 1
+            if 0 <= index < len(choices):
+                choice = choices[index]
+                matched.append(
+                    ModelResult(
+                        start=match.start,
+                        end=match.end,
+                        type_name="choice",
+                        text=match.text,
+                        resolution=FoundChoice(
+                            value=choice.value, index=index, score=1.0
+                        ),
+                    )
+                )
+        except:
+            # noop here, as in dotnet/node repos
+            pass
+    @staticmethod
+    def _recognize_number(utterance: str, culture: str) -> list[ModelResult]:
+        model: NumberModel = cast(
+            NumberModel, NumberRecognizer(culture).get_number_model(culture)
+        )
+        return list(
+            map(ChoiceRecognizers._found_choice_constructor, model.parse(utterance))  # type: ignore[arg-type]
+        )
+    @staticmethod
+    def _found_choice_constructor(value_model: ModelResult) -> ModelResult:
+        return ModelResult(
+            start=value_model.start,
+            end=value_model.end,
+            type_name="choice",
+            text=value_model.text,
+            resolution=FoundChoice(
+                value=value_model.resolution["value"], index=0, score=1.0
+            ),
+        )

microsoft_agents/hosting/dialogs/choices/find.py ADDED Viewed

@@ -0,0 +1,242 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from typing import Callable
+from collections.abc import Iterable
+from .models.choice import Choice
+from .models.find_choices_options import FindChoicesOptions, FindValuesOptions
+from .models.found_choice import FoundChoice
+from .models.found_value import FoundValue
+from .models.model_result import ModelResult
+from .models.sorted_value import SortedValue
+from .models.token import Token
+from .tokenizer import Tokenizer
+class Find:
+    """Contains methods for matching user input against a list of choices"""
+    @staticmethod
+    def find_choices(
+        utterance: str,
+        choices: Iterable[str | Choice],
+        options: FindChoicesOptions | None = None,
+    ) -> list[ModelResult]:
+        """Matches user input against a list of choices"""
+        if not choices:
+            raise TypeError("Find: choices cannot be None.")
+        opt = options or FindChoicesOptions()
+        # Normalize list of choices
+        choices_list = [
+            Choice(value=choice) if isinstance(choice, str) else choice
+            for choice in choices
+        ]
+        # Build up full list of synonyms to search over.
+        # - Each entry in the list contains the index of the choice it belongs to which will later be
+        # used to map the search results back to their choice.
+        synonyms: list[SortedValue] = []
+        for index, choice in enumerate(choices_list):
+            if not opt.no_value:
+                synonyms.append(SortedValue(value=choice.value, index=index))
+            if choice.action and choice.action.title and not opt.no_action:
+                synonyms.append(SortedValue(value=choice.action.title, index=index))
+            if choice.synonyms is not None:
+                for synonym in choice.synonyms:
+                    synonyms.append(SortedValue(value=synonym, index=index))
+        def found_choice_constructor(value_model: ModelResult) -> ModelResult:
+            choice = choices_list[value_model.resolution.index]
+            return ModelResult(
+                start=value_model.start,
+                end=value_model.end,
+                type_name="choice",
+                text=value_model.text,
+                resolution=FoundChoice(
+                    value=choice.value,
+                    index=value_model.resolution.index,
+                    score=value_model.resolution.score,
+                    synonym=value_model.resolution.value,
+                ),
+            )
+        # Find synonyms in utterance and map back to their choices_list
+        return list(
+            map(found_choice_constructor, Find.find_values(utterance, synonyms, opt))
+        )
+    @staticmethod
+    def find_values(
+        utterance: str,
+        values: list[SortedValue],
+        options: FindValuesOptions | None = None,
+    ) -> list[ModelResult]:
+        # Sort values in descending order by length, so that the longest value is searchd over first.
+        sorted_values = sorted(
+            values, key=lambda sorted_val: len(sorted_val.value), reverse=True
+        )
+        # Search for each value within the utterance.
+        matches: list[ModelResult] = []
+        opt = options if options else FindValuesOptions()
+        tokenizer: Callable[[str, str | None], list[Token]] = (
+            opt.tokenizer if opt.tokenizer else Tokenizer.default_tokenizer
+        )
+        tokens = tokenizer(utterance, opt.locale)
+        max_distance = (
+            opt.max_token_distance if opt.max_token_distance is not None else 2
+        )
+        for entry in sorted_values:
+            # Find all matches for a value
+            # - To match "last one" in "the last time I chose the last one" we need
+            #   to re-search the string starting from the end of the previous match.
+            # - The start & end position returned for the match are token positions.
+            start_pos = 0
+            searched_tokens = tokenizer(entry.value.strip(), opt.locale)
+            while start_pos < len(tokens):
+                match: ModelResult | None = Find._match_value(
+                    tokens,
+                    max_distance,
+                    opt,
+                    entry.index,
+                    entry.value,
+                    searched_tokens,
+                    start_pos,
+                )
+                if match is not None:
+                    start_pos = match.end + 1
+                    matches.append(match)
+                else:
+                    break
+        # Sort matches by score descending
+        sorted_matches = sorted(
+            matches,
+            key=lambda model_result: model_result.resolution.score,
+            reverse=True,
+        )
+        # Filter out duplicate matching indexes and overlapping characters
+        # - The start & end positions are token positions and need to be translated to
+        # character positions before returning. We also need to populate the "text"
+        # field as well.
+        results: list[ModelResult] = []
+        found_indexes = set()
+        used_tokens = set()
+        for match in sorted_matches:
+            # Apply filters.
+            add = match.resolution.index not in found_indexes
+            for i in range(match.start, match.end + 1):
+                if i in used_tokens:
+                    add = False
+                    break
+            # Add to results
+            if add:
+                # Update filter info
+                found_indexes.add(match.resolution.index)
+                for i in range(match.start, match.end + 1):
+                    used_tokens.add(i)
+                # Translate start & end and populate text field
+                match.start = tokens[match.start].start
+                match.end = tokens[match.end].end
+                match.text = utterance[match.start : match.end + 1]
+                results.append(match)
+        # Return the results sorted by position in the utterance
+        return sorted(results, key=lambda model_result: model_result.start)
+    @staticmethod
+    def _match_value(
+        source_tokens: list[Token],
+        max_distance: int,
+        options: FindValuesOptions,
+        index: int,
+        value: str,
+        searched_tokens: list[Token],
+        start_pos: int,
+    ) -> ModelResult | None:
+        # Match value to utterance and calculate total deviation.
+        # - The tokens are matched in order so "second last" will match in
+        #   "the second from last one" but not in "the last from the second one".
+        # - The total deviation is a count of the number of tokens skipped in the
+        #   match so for the example above the number of tokens matched would be
+        #   2 and the total deviation would be 1.
+        matched = 0
+        total_deviation = 0
+        start = -1
+        end = -1
+        for token in searched_tokens:
+            # Find the position of the token in the utterance.
+            pos = Find._index_of_token(source_tokens, token, start_pos)
+            if pos >= 0:
+                # Calculate the distance between the current token's position and the previous token's distance.
+                distance = pos - start_pos if matched > 0 else 0
+                if distance <= max_distance:
+                    # Update count of tokens matched and move start pointer to search for next token
+                    # after the current token
+                    matched += 1
+                    total_deviation += distance
+                    start_pos = pos + 1
+                    # Update start & end position that will track the span of the utterance that's matched.
+                    if start < 0:
+                        start = pos
+                    end = pos
+        # Calculate score and format result
+        # - The start & end positions and the results text field will be corrected by the caller.
+        result: ModelResult | None = None
+        if matched > 0 and (
+            matched == len(searched_tokens) or options.allow_partial_matches
+        ):
+            # Percentage of tokens matched. If matching "second last" in
+            # "the second form last one" the completeness would be 1.0 since
+            # all tokens were found.
+            completeness = matched / len(searched_tokens)
+            # Accuracy of the match. The accuracy is reduced by additional tokens
+            # occuring in the value that weren't in the utterance. So an utterance
+            # of "second last" matched against a value of "second from last" would
+            # result in an accuracy of 0.5.
+            accuracy = float(matched) / (matched + total_deviation)
+            # The final score is simply the compeleteness multiplied by the accuracy.
+            score = completeness * accuracy
+            # Format result
+            result = ModelResult(
+                text="",
+                start=start,
+                end=end,
+                type_name="value",
+                resolution=FoundValue(value=value, index=index, score=score),
+            )
+        return result
+    @staticmethod
+    def _index_of_token(tokens: list[Token], token: Token, start_pos: int) -> int:
+        for i in range(start_pos, len(tokens)):
+            if tokens[i].normalized == token.normalized:
+                return i
+        return -1

microsoft_agents/hosting/dialogs/choices/models/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+from .choice_factory_options import ChoiceFactoryOptions
+from .choice import Choice
+from .find_choices_options import FindChoicesOptions
+from .find_values_options import FindValuesOptions
+from .found_choice import FoundChoice
+from .found_value import FoundValue
+from .list_style import ListStyle
+from .model_result import ModelResult
+from .sorted_value import SortedValue
+from .token import Token
+__all__ = [
+    "ChoiceFactoryOptions",
+    "Choice",
+    "FindChoicesOptions",
+    "FindValuesOptions",
+    "FoundChoice",
+    "FoundValue",
+    "ListStyle",
+    "ModelResult",
+    "SortedValue",
+    "Token",
+]

microsoft_agents/hosting/dialogs/choices/models/choice.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from dataclasses import dataclass, field
+from microsoft_agents.activity import CardAction
+@dataclass
+class Choice:
+    value: str = ""
+    action: CardAction | None = None
+    synonyms: list[str] = field(default_factory=list)

microsoft_agents/hosting/dialogs/choices/models/choice_factory_options.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from dataclasses import dataclass
+@dataclass
+class ChoiceFactoryOptions:
+    inline_separator: str | None = None
+    inline_or: str | None = None
+    inline_or_more: str | None = None
+    include_numbers: bool = True

microsoft_agents/hosting/dialogs/choices/models/find_choices_options.py ADDED Viewed

@@ -0,0 +1,28 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from dataclasses import dataclass
+from .find_values_options import FindValuesOptions
+@dataclass
+class FindChoicesOptions(FindValuesOptions):
+    """Contains options to control how input is matched against a list of choices
+    no_value: If `True`, the choices `value` field will NOT be search over. Defaults to `False`.
+    no_action: If `True`, the choices `action.title` field will NOT be searched over.
+        Defaults to `False`.
+    recognize_numbers: Indicates whether the recognizer should check for Numbers using the
+    NumberRecognizer's NumberModel.
+    recognize_ordinals: Indicates whether the recognizer should check for Ordinal Numbers using
+    the NumberRecognizer's OrdinalModel.
+    """
+    no_value: bool = False
+    no_action: bool = False
+    recognize_numbers: bool = True
+    recognize_ordinals: bool = True

microsoft_agents/hosting/dialogs/choices/models/find_values_options.py ADDED Viewed

@@ -0,0 +1,31 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from collections.abc import Iterable
+from dataclasses import dataclass
+from typing import Callable
+from .token import Token
+@dataclass
+class FindValuesOptions:
+    """Contains search options, used to control how choices are recognized in a user's utterance.
+    allow_partial_matches: (Optional) If `True`, then only some of the tokens in a value need to exist to be considered
+    a match. The default value is `False`.
+    locale: (Optional) locale/culture code of the utterance. Default is `en-US`.
+    max_token_distance: (Optional) maximum tokens allowed between two matched tokens in the utterance. So with
+    a max distance of 2 the value "second last" would match the utterance "second from the last"
+    but it wouldn't match "Wait a second. That's not the last one is it?".
+    The default value is "2".
+    tokenizer: (Optional) Tokenizer to use when parsing the utterance and values being recognized.
+    """
+    allow_partial_matches: bool = False
+    locale: str = "en-US"
+    max_token_distance: int = 2
+    tokenizer: Callable[[str, str | None], list[Token]] | None = None

microsoft_agents/hosting/dialogs/choices/models/found_choice.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from dataclasses import dataclass
+@dataclass
+class FoundChoice:
+    """Represents a result from matching user input against a list of choices.
+    value: The value of the choice that was matched.
+    index: The index of the choice that was matched.
+    score: The accuracy with which the synonym matched the specified portion of the utterance.
+    A value of 1.0 would indicate a perfect match.
+    synonym: The synonym that was matched in case of a synonym match.
+    """
+    value: str
+    index: int
+    score: float
+    synonym: str | None = None

microsoft_agents/hosting/dialogs/choices/models/found_value.py ADDED Viewed

@@ -0,0 +1,20 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from dataclasses import dataclass
+@dataclass
+class FoundValue:
+    """Represents a result from matching user input against a list of choices
+    value: The value that was matched.
+    index: The index of the value that was matched.
+    score: The accuracy with which the synonym matched the specified portion of the utterance.
+    A value of 1.0 would indicate a perfect match.
+    """
+    value: str
+    index: int
+    score: float

microsoft_agents/hosting/dialogs/choices/models/list_style.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from enum import Enum
+class ListStyle(int, Enum):
+    """Defines the style of list to present choices to the user."""
+    none = 0
+    auto = 1
+    in_line = 2
+    list_style = 3
+    suggested_action = 4
+    hero_card = 5

microsoft_agents/hosting/dialogs/choices/models/model_result.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from dataclasses import dataclass
+from typing import Any
+@dataclass
+class ModelResult:
+    """Contains recognition result information."""
+    text: str
+    start: int
+    end: int
+    type_name: str
+    resolution: Any

microsoft_agents/hosting/dialogs/choices/models/sorted_value.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from dataclasses import dataclass
+@dataclass
+class SortedValue:
+    """A value that can be sorted and still refer to its original position with a source array.
+    value: the value that will be sorted.
+    index: the value's original position within its unsorted array.
+    """
+    value: str
+    index: int

microsoft_agents/hosting/dialogs/choices/models/token.py ADDED Viewed

@@ -0,0 +1,20 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+from dataclasses import dataclass
+@dataclass
+class Token:
+    """Represents an individual token, such as a word in an input string.
+    start: The index of the first character of the token within the outer input string.
+    end: The index of the last character of the token within the outer input string.
+    text: The original text of the token.
+    normalized: A normalized version of the token. This can include things like lower casing or stemming.
+    """
+    start: int
+    end: int
+    text: str
+    normalized: str | None