PyPI - ovos-adapt-parser - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ovos-adapt-parser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

ovos_adapt/__init__.py +21 -0
ovos_adapt/context.py +154 -0
ovos_adapt/engine.py +490 -0
ovos_adapt/entity_tagger.py +120 -0
ovos_adapt/expander.py +309 -0
ovos_adapt/intent.py +329 -0
ovos_adapt/opm.py +360 -0
ovos_adapt/parser.py +90 -0
ovos_adapt/tools/__init__.py +17 -0
ovos_adapt/tools/debug/__init__.py +70 -0
ovos_adapt/tools/text/__init__.py +16 -0
ovos_adapt/tools/text/tokenizer.py +98 -0
ovos_adapt/tools/text/trie.py +302 -0
ovos_adapt/version.py +6 -0
ovos_adapt_parser-0.1.0.dist-info/LICENSE.md +211 -0
ovos_adapt_parser-0.1.0.dist-info/METADATA +118 -0
ovos_adapt_parser-0.1.0.dist-info/RECORD +20 -0
ovos_adapt_parser-0.1.0.dist-info/WHEEL +5 -0
ovos_adapt_parser-0.1.0.dist-info/entry_points.txt +3 -0
ovos_adapt_parser-0.1.0.dist-info/top_level.txt +1 -0

ovos_adapt/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright 2018 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+__author__ = 'seanfitz'
+import os
+if os.path.exists('README.md'):
+  import codecs
+  __doc__ = codecs.open('README.md', encoding='utf-8', mode='r').read()

ovos_adapt/context.py ADDED Viewed

@@ -0,0 +1,154 @@
+# Copyright 2018 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+Context Management code for Adapt (where context ~= persistent session state).
+"""
+from six.moves import xrange
+__author__ = "seanfitz, Art McGee"
+class ContextManagerFrame(object):
+    """
+    Manages entities and context for a single frame of conversation.
+    Provides simple equality querying.
+    Attributes:
+        entities(list): Entities that belong to ContextManagerFrame
+        metadata(object): metadata to describe context belonging to ContextManagerFrame
+    """
+    def __init__(self, entities=None, metadata=None):
+        """
+        Initialize ContextManagerFrame
+        Args:
+            entities(list): List of Entities...
+            metadata(object): metadata to describe context?
+        """
+        self.entities = entities or []
+        self.metadata = metadata or {}
+    def metadata_matches(self, query=None):
+        """
+        Returns key matches to metadata
+        Asserts that the contents of query exist within (logical subset of)
+        metadata in this frame.
+        Args:
+            query(object): metadata for matching
+        Returns:
+            bool:
+                True: when key count in query is > 0 and all keys in query in
+                    self.metadata
+                False: if key count in query is <= 0 or any key in query not
+                    found in self.metadata
+        """
+        query = query or {}
+        result = len(query.keys()) > 0
+        for key in query.keys():
+            result = result and query[key] == self.metadata.get(key)
+        return result
+    def merge_context(self, tag, metadata):
+        """
+        merge into contextManagerFrame new entity and metadata.
+        Appends tag as new entity and adds keys in metadata to keys in
+        self.metadata.
+        Args:
+            tag(str): entity to be added to self.entities
+            metadata(object): metadata containes keys to be added to self.metadata
+        """
+        self.entities.append(tag)
+        for k, v in metadata.items():
+            if k not in self.metadata:
+                self.metadata[k] = v
+class ContextManager(object):
+    """
+    ContextManager
+    Use to track context throughout the course of a conversational session.
+    How to manage a session's lifecycle is not captured here.
+    """
+    def __init__(self):
+        self.frame_stack = []
+    def inject_context(self, entity, metadata=None):
+        """
+        Add an entity to the current context.
+        If metadata matches the top of the context frame stack, merge.
+        Else, create a new frame and push it on top of the stack.
+        Args:
+            entity(object):
+                format {'data': 'Entity tag as <str>',
+                        'key': 'entity proper name as <str>',
+                         'confidence': <float>'
+                         }
+            metadata(object): dict, arbitrary metadata about the entity being added
+        """
+        metadata = metadata or {}
+        top_frame = self.frame_stack[0] if len(self.frame_stack) > 0 else None
+        if top_frame and top_frame.metadata_matches(metadata):
+            top_frame.merge_context(entity, metadata)
+        else:
+            frame = ContextManagerFrame(entities=[entity], metadata=metadata.copy())
+            self.frame_stack.insert(0, frame)
+    def get_context(self, max_frames=None, missing_entities=None):
+        """
+        Returns context, including decaying weights based on depth in stack.
+        Args:
+            max_frames(int): maximum number of frames to look back
+            missing_entities(list of str): a list or set of tag names, as strings
+        Returns:
+            list: a list of entities
+        """
+        missing_entities = missing_entities or []
+        if not max_frames or max_frames > len(self.frame_stack):
+            max_frames = len(self.frame_stack)
+        missing_entities = list(missing_entities)
+        context = []
+        for i in xrange(max_frames):
+            frame_entities = [entity.copy() for entity in self.frame_stack[i].entities]
+            for entity in frame_entities:
+                entity['confidence'] = entity.get('confidence', 1.0) / (2.0 + i)
+            context += frame_entities
+        result = []
+        if len(missing_entities) > 0:
+            for entity in context:
+                if entity.get('data') in missing_entities:
+                    result.append(entity)
+                    # NOTE: this implies that we will only ever get one
+                    # of an entity kind from context, unless specified
+                    # multiple times in missing_entities. Cannot get
+                    # an arbitrary number of an entity kind.
+                    missing_entities.remove(entity.get('data'))
+        else:
+            result = context
+        return result

ovos_adapt/engine.py ADDED Viewed

@@ -0,0 +1,490 @@
+# Copyright 2018 Mycroft AI Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import re
+import heapq
+from ovos_adapt.entity_tagger import EntityTagger
+from ovos_adapt.parser import Parser
+from ovos_adapt.tools.text.tokenizer import EnglishTokenizer
+from ovos_adapt.tools.text.trie import Trie
+__author__ = 'seanfitz'
+class IntentDeterminationEngine(object):
+    """
+    IntentDeterminationEngine
+    The IntentDeterminationEngine is a greedy and naive implementation of intent determination. Given an utterance,
+    it uses the Adapt parsing tools to come up with a sorted collection of tagged parses. A valid parse result contains
+    no overlapping tagged entities, and its confidence is the sum of the tagged entity confidences, which are
+    weighted based on the percentage of the utterance (per character) that the entity match represents.
+    This system makes heavy use of generators to enable greedy algorithms to short circuit large portions of
+    computation, however making use of context or regular expressions prevents these optimizations.
+    """
+    def __init__(self, tokenizer=None, trie=None):
+        """
+        Initialize the IntentDeterminationEngine
+        Args:
+            tokenizer(tokenizer) : tokenizer used to break up spoken text
+                example EnglishTokenizer()
+            trie(Trie): tree of matches to Entites
+        """
+        self.tokenizer = tokenizer or EnglishTokenizer()
+        self.trie = trie or Trie()
+        self.regular_expressions_entities = []
+        self._regex_strings = set()
+        self.intent_parsers = []
+    def __best_intent(self, parse_result, context=[]):
+        """
+        For the specified parse_result, find the intent parser with the
+        highest confidence match.
+        Args:
+            parse_result(list): results used to match the best intent.
+            context(list): ?
+        Returns:
+            best_intent, best_tags:
+                best_intent : The best intent for given results
+                best_tags : The Tags for result
+        """
+        best_intent = None
+        best_tags = None
+        # TODO: there's a bunch of subtlety here around what the values of `match` and `key` should be
+        # Longer term, this should probably be typed, barring any performance regressions.
+        context_as_entities = [
+            {
+                'key': c['key'],
+                'entities': [c],
+                'from_context': True
+             } for c in context
+        ]
+        for intent in self.intent_parsers:
+            i, tags = intent.validate_with_tags(parse_result.get('tags') + context_as_entities, parse_result.get('confidence'))
+            if not best_intent or (i and i.get('confidence') > best_intent.get('confidence')):
+                best_intent = i
+                best_tags = tags
+        return best_intent, best_tags
+    def __get_unused_context(self, parse_result, context):
+        """ Used to get unused context from context.  Any keys not in
+        parse_result
+        Args:
+            parse_results(list): parsed results used to identify what keys
+                in the context are used.
+            context(list): this is the context used to match with parsed results
+                keys missing in the parsed results are the unused context
+        Returns:
+            list: A list of the unused context results.
+        """
+        tags_keys = set([t['key'] for t in parse_result['tags'] if t['from_context']])
+        result_context = [c for c in context if c['key'] not in tags_keys]
+        return result_context
+    @property
+    def tagger(self):
+        return EntityTagger(self.trie, self.tokenizer,
+                     self.regular_expressions_entities)
+    def determine_intent(self, utterance, num_results=1, include_tags=False, context_manager=None):
+        """
+        Given an utterance, provide a valid intent.
+        Args:
+            utterance(str): an ascii or unicode string representing natural language speech
+            include_tags(bool): includes the parsed tags (including position and confidence)
+                as part of result
+            context_manager(list): a context manager to provide context to the utterance
+            num_results(int): a maximum number of results to be returned.
+        Returns: A generator that yields dictionaries.
+        """
+        parser = Parser(self.tokenizer, self.tagger)
+        context = []
+        if context_manager:
+            context = context_manager.get_context()
+        # Adapt consumers assume that results are sorted by confidence. parser
+        # will yield results sorted by utterance coverage, but regex
+        # and context entities will have different weights, and
+        # can influence final sorting.
+        requires_final_sort = self.regular_expressions_entities or context
+        def generate_intents():
+            for result in parser.parse(utterance, N=num_results, context=context):
+                # create a context without entities used in result
+                remaining_context = self.__get_unused_context(result, context)
+                best_intent, tags = self.__best_intent(result, remaining_context)
+                if best_intent and best_intent.get('confidence', 0.0) > 0:
+                    if include_tags:
+                        best_intent['__tags__'] = tags
+                    yield best_intent
+        if requires_final_sort:
+            sorted_iterable = sorted([
+                i for i in generate_intents()
+            ], key=lambda x: -x.get('confidence', 0.0))
+        else:
+            sorted_iterable = generate_intents()
+        for intent in sorted_iterable:
+            yield intent
+    def register_entity(self, entity_value, entity_type, alias_of=None):
+        """
+        Register an entity to be tagged in potential parse results
+        Args:
+            entity_value(str): the value/proper name of an entity instance (Ex: "The Big Bang Theory")
+            entity_type(str): the type/tag of an entity instance (Ex: "Television Show")
+        """
+        if alias_of:
+            self.trie.insert(entity_value.lower(), data=(alias_of, entity_type))
+        else:
+            self.trie.insert(entity_value.lower(), data=(entity_value, entity_type))
+            self.trie.insert(entity_type.lower(), data=(entity_type, 'Concept'))
+    def register_regex_entity(self, regex_str):
+        """
+        A regular expression making use of python named group expressions.
+        Example: (?P<Artist>.*)
+        regex_str(str): a string representing a regular expression as defined above
+        """
+        if regex_str and regex_str not in self._regex_strings:
+            self._regex_strings.add(regex_str)
+            self.regular_expressions_entities.append(re.compile(regex_str, re.IGNORECASE))
+    def register_intent_parser(self, intent_parser):
+        """
+        "Enforce" the intent parser interface at registration time.
+        Args:
+            intent_parser(intent): Intent to be registered.
+        Raises:
+            ValueError: on invalid intent
+        """
+        if hasattr(intent_parser, 'validate') and callable(intent_parser.validate):
+            self.intent_parsers.append(intent_parser)
+        else:
+            raise ValueError("%s is not an intent parser" % str(intent_parser))
+    def drop_intent_parser(self, parser_names):
+        """Drop a registered intent parser.
+        Arguments:
+            parser_names (str or iterable): parser name to drop or list of
+                                            names
+        Returns:
+            (bool) True if a parser was dropped else False
+        """
+        if isinstance(parser_names, str):
+            parser_names = [parser_names]
+        new_parsers = [p for p in self.intent_parsers
+                       if p.name not in parser_names]
+        num_original_parsers = len(self.intent_parsers)
+        self.intent_parsers = new_parsers
+        return len(self.intent_parsers) != num_original_parsers
+    def drop_entity(self, entity_type=None, match_func=None):
+        """Drop all entities mathching the given entity type or match function
+        Arguments:
+            entity_type (str): entity name to match against
+            match_func (callable): match function to find entities
+        Returns:
+            (bool) True if vocab was found and removed otherwise False.
+        """
+        def default_match_func(data):
+            return data and data[1] == entity_type
+        ent_tuples = self.trie.scan(match_func or default_match_func)
+        for entity in ent_tuples:
+            self.trie.remove(*entity)
+        return len(ent_tuples) != 0
+    def drop_regex_entity(self, entity_type=None, match_func=None):
+        """Remove registered regex entity.
+        Arguments:
+            entity_type (str): entity name to match against
+            match_func (callable): match function to find entities
+        Returns:
+            (bool) True if vocab was found and removed otherwise False.
+        """
+        def default_match_func(regexp):
+            return entity_type in regexp.groupindex.keys()
+        match_func = match_func or default_match_func
+        matches = [r for r in self.regular_expressions_entities
+                   if match_func(r)]
+        matching_patterns = [r.pattern for r in matches]
+        matches = [
+            r for r in self.regular_expressions_entities if r in matches
+        ]
+        for match in matches:
+            self.regular_expressions_entities.remove(match)
+        self._regex_strings = {
+            r for r in self._regex_strings if r not in matching_patterns
+        }
+        return len(matches) != 0
+class DomainIntentDeterminationEngine(object):
+    """
+    DomainIntentDeterminationEngine.
+    The DomainIntentDeterminationEngine is a greedy and naive implementation of intent
+    determination. Given an utterance, it uses the Adapt parsing tools to come up with a
+    sorted collection of tagged parses. A valid parse result contains no overlapping
+    tagged entities in a single domain, and its confidence is the sum of the tagged
+    entity confidences, which are weighted based on the percentage of the utterance
+    (per character) that the entity match represents.
+    This system makes heavy use of generators to enable greedy algorithms to short circuit
+    large portions of computation.
+    """
+    def __init__(self):
+        """
+        Initialize DomainIntentDeterminationEngine.
+        """
+        self.domains = {}
+    @property
+    def tokenizer(self):
+        """
+        A property to link into IntentEngine's tokenizer.
+        Warning: this is only for backwards compatiblility and should not be used if you
+            intend on using domains.
+        Return: the domains tokenizer from its IntentEngine
+        """
+        domain = 0
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        return self.domains[domain].tokenizer
+    @property
+    def trie(self):
+        """
+        A property to link into IntentEngine's trie.
+        warning:: this is only for backwards compatiblility and should not be used if you
+        intend on using domains.
+        Return: the domains trie from its IntentEngine
+        """
+        domain = 0
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        return self.domains[domain].trie
+    @property
+    def tagger(self):
+        """
+        A property to link into IntentEngine's intent_parsers.
+        Warning: this is only for backwards compatiblility and should not be used if you
+        intend on using domains.
+        Return: the domains intent_parsers from its IntentEngine
+        """
+        domain = 0
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        return self.domains[domain].tagger
+    @property
+    def intent_parsers(self):
+        """
+        A property to link into IntentEngine's intent_parsers.
+        Warning: this is only for backwards compatiblility and should not be used if you
+            intend on using domains.
+        Returns: the domains intent_parsers from its IntentEngine
+        """
+        domain = 0
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        return self.domains[domain].intent_parsers
+    @property
+    def _regex_strings(self):
+        """
+        A property to link into IntentEngine's _regex_strings.
+        Warning: this is only for backwards compatiblility and should not be used if you
+            intend on using domains.
+        Returns: the domains _regex_strings from its IntentEngine
+        """
+        domain = 0
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        return self.domains[domain]._regex_strings
+    @property
+    def regular_expressions_entities(self):
+        """
+        A property to link into IntentEngine's regular_expressions_entities.
+        Warning: this is only for backwards compatiblility and should not be used if you
+            intend on using domains.
+        Returns: the domains regular_expression_entities from its IntentEngine
+        """
+        domain = 0
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        return self.domains[domain].regular_expressions_entities
+    def register_domain(self, domain=0, tokenizer=None, trie=None):
+        """
+        Register a domain with the intent engine.
+        Args:
+            tokenizer(tokenizer): The tokenizer you wish to use.
+            trie(Trie): the Trie() you wish to use.
+            domain(str): a string representing the domain you wish to add
+        """
+        self.domains[domain] = IntentDeterminationEngine(
+            tokenizer=tokenizer, trie=trie)
+    def register_entity(self, entity_value, entity_type, alias_of=None, domain=0):
+        """
+        Register an entity to be tagged in potential parse results.
+        Args:
+            entity_value(str): the value/proper name of an entity instance
+                (Ex: "The Big Bang Theory")
+            entity_type(str): the type/tag of an entity instance (Ex: "Television Show")
+            domain(str): a string representing the domain you wish to add the entity to
+        """
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        self.domains[domain].register_entity(entity_value=entity_value,
+                                             entity_type=entity_type,
+                                             alias_of=alias_of)
+    def register_regex_entity(self, regex_str, domain=0):
+        """
+        A regular expression making use of python named group expressions.
+        Example: (?P<Artist>.*)
+        Args:
+            regex_str(str): a string representing a regular expression as defined above
+            domain(str): a string representing the domain you wish to add the entity to
+        """
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        self.domains[domain].register_regex_entity(regex_str=regex_str)
+    def determine_intent(self, utterance, num_results=1):
+        """
+        Given an utterance, provide a valid intent.
+        utterance(str): an ascii or unicode string representing natural language speech
+        num_results(int): a maximum number of results to be returned.
+        Returns: A generator the yields dictionaries.
+        """
+        intents = []
+        for domain in self.domains:
+            gen = self.domains[domain].determine_intent(utterance=utterance,
+                                                        num_results=1)
+            for intent in gen:
+                intents.append(intent)
+        heapq.nlargest(
+            num_results, intents, key=lambda domain: domain['confidence'])
+        for intent in intents:
+            yield intent
+    def register_intent_parser(self, intent_parser, domain=0):
+        """
+        Register a intent parser with a domain.
+        Args:
+            intent_parser(intent): The intent parser you wish to register.
+            domain(str): a string representing the domain you wish register the intent
+                parser to.
+        """
+        if domain not in self.domains:
+            self.register_domain(domain=domain)
+        self.domains[domain].register_intent_parser(
+            intent_parser=intent_parser)
+    def drop_intent_parser(self, parser_names, domain):
+        """Drop a registered intent parser.
+        Arguments:
+            parser_names (list, str): parser names to drop.
+            domain (str): domain to drop from
+        Returns:
+            (bool) True if an intent parser was dropped else false.
+        """
+        return self.domains[domain].drop_intent_parser(parser_names)
+    def drop_entity(self, domain, entity_type=None, match_func=None):
+        """Drop all entities mathching the given entity type or match function.
+        Arguments:
+            domain (str): intent domain
+            entity_type (str): entity name to match against
+            match_func (callable): match function to find entities
+        Returns:
+            (bool) True if vocab was found and removed otherwise False.
+        """
+        return self.domains[domain].drop_entity(entity_type=entity_type,
+                                                match_func=match_func)
+    def drop_regex_entity(self, domain, entity_type=None, match_func=None):
+        """Remove registered regex entity.
+        Arguments:
+            domain (str): intent domain
+            entity_type (str): entity name to match against
+            match_func (callable): match function to find entities
+        Returns:
+            (bool) True if vocab was found and removed otherwise False.
+        """
+        return self.domains[domain].drop_regex_entity(entity_type=entity_type,
+                                                      match_func=match_func)