PyPI - nlptoolkit-dictionary - Versions diffs - 1.0.36__tar.gz - Mend

nlptoolkit-dictionary 1.0.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

nlptoolkit_dictionary-1.0.36/Dictionary/Dictionary.py ADDED Viewed

@@ -0,0 +1,229 @@
+from Dictionary.Word import Word
+class Dictionary:
+    words: list
+    filename: str
+    def __init__(self, comparator=None):
+        """
+        An empty constructor of Dictionary class.
+        """
+        self.words = []
+        self.filename = ""
+        if comparator is None:
+            self.comparator = Dictionary.turkishLowerCaseComparator
+        else:
+            self.comparator = comparator
+    @staticmethod
+    def turkishLowerCaseComparator(wordA: Word, wordB: Word):
+        """
+        Compares two words in a case-sensitive manner.
+        :param wordA: First word to compare.
+        :param wordB: Second word to compare.
+        :return: the value 0 if the wordA is equal to the wordB; a value less than 0 if this wordA is
+        lexicographically less than wordB; and a value greater than 1 if this wordA is lexicographically greater
+        than wordB.
+        """
+        LOWERCASE_LETTERS = "abcçdefgğhıijklmnoöprsştuüvyz"
+        for i in range(min(len(wordA.getName()), len(wordB.getName()))):
+            first_char = wordA.getName()[i:i + 1]
+            second_char = wordB.getName()[i:i + 1]
+            if first_char != second_char:
+                if first_char in LOWERCASE_LETTERS and second_char not in LOWERCASE_LETTERS:
+                    return -1
+                elif first_char not in LOWERCASE_LETTERS and second_char in LOWERCASE_LETTERS:
+                    return 1
+                elif first_char in LOWERCASE_LETTERS and second_char in LOWERCASE_LETTERS:
+                    first = LOWERCASE_LETTERS.index(first_char)
+                    second = LOWERCASE_LETTERS.index(second_char)
+                    if first < second:
+                        return -1
+                    elif first > second:
+                        return 1
+        if len(wordA.getName()) < len(wordB.getName()):
+            return -1
+        elif len(wordA.getName()) > len(wordB.getName()):
+            return 1
+        else:
+            return 0
+    @staticmethod
+    def turkishIgnoreCaseComparator(wordA: Word, wordB: Word):
+        """
+        Compares two words in a case-insensitive manner.
+        :param wordA: First word to compare.
+        :param wordB: Second word to compare.
+        :return: the value 0 if the wordA is equal to the wordB; a value less than 0 if this wordA is
+        lexicographically less than wordB; and a value greater than 1 if this wordA is lexicographically greater
+        than wordB.
+        """
+        IGNORE_CASE_LETTERS = "aAbBcCçÇdDeEfFgGğĞhHıIiİjJkKlLmMnNoOöÖpPrRsSşŞtTuUüÜvVyYzZ"
+        for i in range(min(len(wordA.getName()), len(wordB.getName()))):
+            first_char = wordA.getName()[i:i + 1]
+            second_char = wordB.getName()[i:i + 1]
+            if first_char != second_char:
+                if first_char in IGNORE_CASE_LETTERS and second_char not in IGNORE_CASE_LETTERS:
+                    return -1
+                elif first_char not in IGNORE_CASE_LETTERS and second_char in IGNORE_CASE_LETTERS:
+                    return 1
+                elif first_char in IGNORE_CASE_LETTERS and second_char in IGNORE_CASE_LETTERS:
+                    first = IGNORE_CASE_LETTERS.index(first_char)
+                    second = IGNORE_CASE_LETTERS.index(second_char)
+                    if first < second:
+                        return -1
+                    elif first > second:
+                        return 1
+        if len(wordA.getName()) < len(wordB.getName()):
+            return -1
+        elif len(wordA.getName()) > len(wordB.getName()):
+            return 1
+        else:
+            return 0
+    def getWord(self, name: str) -> Word:
+        """
+        The getWord method takes a String name as an input and performs binary search within words list and assigns the
+        result to integer variable middle. If the middle is greater than 0, it returns the item at index middle of words
+        list, None otherwise.
+        PARAMETERS
+        ----------
+        name : str
+            String input.
+        RETURNS
+        -------
+        Word
+            the item at found index of words {@link ArrayList}, null if cannot be found.
+        """
+        word = Word(name)
+        middle = self.__getPosition(word)
+        if middle >= 0:
+            return self.words[middle]
+        return None
+    def getWordIndex(self, name: str) -> int:
+        """
+        The getWordIndex method takes a String name as an input and performs binary search within words list and assigns
+        the result to integer variable middle. If the middle is greater than 0, it returns the index middle, -1
+        otherwise.
+        PARAMETERS
+        ----------
+        name : str
+            String input.
+        RETURNS
+        -------
+        int
+            found index of words list, -1 if cannot be found.
+        """
+        word = Word(name)
+        middle = self.__getPosition(word)
+        if middle >= 0:
+            return middle
+        return -1
+    def removeWord(self, name: str):
+        """
+        RemoveWord removes a word with the given name
+        PARAMETERS
+        ----------
+        name : str
+            Name of the word to be removed.
+        """
+        index = self.getWordIndex(name)
+        if index != -1:
+            self.words.pop(index)
+    def size(self) -> int:
+        """
+        The size method returns the size of the words list.
+        RETURNS
+        -------
+        int
+            The size of the words list.
+        """
+        return len(self.words)
+    def getWordWithIndex(self, index: int) -> Word:
+        """
+        The getWordWithIndex method which takes an index as an input and returns the value at given index of words list.
+        PARAMETERS
+        ----------
+        index : int
+            index to get the value.
+        RETURNS
+        -------
+        Word
+            The value at given index of words list.
+        """
+        return self.words[index]
+    def longestWordSize(self) -> int:
+        """
+        The longestWordSize method loops through the words list and returns the item with the maximum word length.
+        RETURNS
+        -------
+        int
+            The item with the maximum word length.
+        """
+        max_length = 0
+        for word in self.words:
+            if len(word.getName()) > max_length:
+                max_length = len(word.getName())
+        return max_length
+    def __getPosition(self, word: Word) -> int:
+        """
+        Checks if a given word exists in the dictionary by performing a binary search on the words array.
+        :param word: Searched word
+        :return: the index of the search word, if it is contained in the words array; otherwise,
+        (-(insertion point) - 1). The insertion point is defined as the point at which the word would be inserted into
+        the words array.
+        """
+        lo = 0
+        hi = len(self.words) - 1
+        while lo <= hi:
+            mid = (lo + hi) // 2
+            if self.comparator(self.words[mid], word) < 0:
+                lo = mid + 1
+            elif self.comparator(self.words[mid], word) > 0:
+                hi = mid - 1
+            else:
+                return mid
+        return -(lo + 1)
+    def getWordStartingWith(self, _hash: str) -> int:
+        """
+        The getWordStartingWith method takes a String hash as an input and performs binary search within words list and
+        assigns the result to integer variable middle. If the middle is greater than 0, it returns the index middle,
+        -middle-1 otherwise.
+        PARAMETERS
+        ----------
+        _hash : str
+            String input.
+        RETURNS
+        -------
+        int
+            Found index of words list, -middle-1 if cannot be found.
+        """
+        word = Word(_hash)
+        middle = self.__getPosition(word)
+        if middle < 0:
+            return -middle - 1
+        else:
+            return middle
+    def __repr__(self):
+        return f"{self.words}"

nlptoolkit_dictionary-1.0.36/Dictionary/ExceptionalWord.py ADDED Viewed

@@ -0,0 +1,52 @@
+from Dictionary.Word import Word
+from Dictionary.Pos import Pos
+class ExceptionalWord(Word):
+    __root: str
+    __pos: Pos
+    def __init__(self, name: str, root: str, pos: Pos):
+        """
+        A constructor of ExceptionalWord class which takes a Pos as a  part of speech and two Strings; name
+        and root as inputs. Then, calls its super class Word with given name and initialises root and pos variables
+        with given inputs.
+        PARAMETERS
+        ----------
+        name : str
+            String input.
+        root : str
+            String input.
+        pos : Pos
+            Pos type input.
+        """
+        super().__init__(name)
+        self.__root = root
+        self.__pos = pos
+    def getRoot(self) -> str:
+        """
+        Getter for the root variable.
+        RETURNS
+        -------
+        str
+            root variable.
+        """
+        return self.__root
+    def getPos(self) -> Pos:
+        """
+        Getter for the pos variable.
+        RETURNS
+        -------
+        Pos
+            pos variable.
+        """
+        return self.__pos
+    def __repr__(self):
+        return f"{self.name} {self.__root} {self.__pos}"

nlptoolkit_dictionary-1.0.36/Dictionary/Pos.py ADDED Viewed

@@ -0,0 +1,40 @@
+from enum import Enum, auto
+class Pos(Enum):
+    """
+    Parts of speech.
+    """
+    """
+    Adjective.
+    """
+    ADJECTIVE = auto()
+    """
+    Noun.
+    """
+    NOUN = auto()
+    """
+    Verb.
+    """
+    VERB = auto()
+    """
+    Adverb.
+    """
+    ADVERB = auto()
+    """
+    Conjunction.
+    """
+    CONJUNCTION = auto()
+    """
+    Interjection.
+    """
+    INTERJECTION = auto()
+    """
+    Preposition.
+    """
+    PREPOSITION = auto()
+    """
+    Pronoun.
+    """
+    PRONOUN = auto()

nlptoolkit_dictionary-1.0.36/Dictionary/Trie/Trie.py ADDED Viewed

@@ -0,0 +1,88 @@
+from Dictionary.Trie.TrieNode import TrieNode
+from Dictionary.Word import Word
+from Dictionary.TxtWord import TxtWord
+class Trie:
+    __root_node: TrieNode
+    def __init__(self):
+        """
+        A constructor of Trie class which creates a new TrieNode as rootNode.
+        """
+        self.__root_node = TrieNode()
+    def addWord(self,
+                word: str,
+                root: Word):
+        """
+        The addWord method which takes a String word and a Word root as inputs and adds given word and root to the
+        rootNode.
+        PARAMETERS
+        ----------
+        word : str
+            String input.
+        root : Word
+            Word input.
+        """
+        self.__root_node.addWord(word, root)
+    def getWordsWithPrefix(self, surfaceForm: str) -> set[Word]:
+        """
+        The getWordsWithPrefix method which takes a String surfaceForm as an input. First it creates a TrieNode current
+        and assigns the rootNode to it, then it creates a new set words. It loops i times where i ranges from 0 to
+        length of surfaceForm and assigns current's child that corresponds to the surfaceForm's char at index i and
+        assigns it as TrieNode current. If current is not None, it adds all words of current to the words set.
+        PARAMETERS
+        ----------
+        surfaceForm : str
+            String input.
+        RETURNS
+        -------
+        set
+            words set.
+        """
+        current = self.__root_node
+        words = set()
+        for i in range(len(surfaceForm)):
+            current = current.getChild(surfaceForm[i])
+            if current is not None:
+                words.update(current.getWords())
+            else:
+                break
+        return words
+    def getCompundWordStartingWith(self, _hash: str) -> TxtWord:
+        """
+        The getCompoundWordStartingWith method takes a String hash. First it creates a TrieNode current and assigns
+        the rootNode to it. Then it loops i times where i ranges from 0 to length of given hash and assigns current's
+        child that corresponds to the hash's char at index i and assigns it as current. If current is None, it returns
+        null.
+        If current is not None, it loops through the words of current TrieNode and if it is a Portmanteau word, it
+        directly returns the word.
+        PARAMETERS
+        ----------
+        _hash : str
+            String input.
+        RETURNS
+        -------
+        TxtWord
+            None if TrieNode is None, otherwise portmanteau word.
+        """
+        current = self.__root_node
+        for i in range(len(_hash)):
+            current = current.getChild(_hash[i])
+            if current is None:
+                return None
+        if current is not None:
+            for word in current.getWords():
+                if word.isPortmanteau():
+                    return word
+        return None

nlptoolkit_dictionary-1.0.36/Dictionary/Trie/TrieNode.py ADDED Viewed

@@ -0,0 +1,75 @@
+from __future__ import annotations
+from Dictionary.Word import Word
+class TrieNode:
+    __children: dict[str, TrieNode]
+    __words: set[Word]
+    def __init__(self):
+        """
+        A constructor of TrieNode class which creates a new children.
+        """
+        self.__children = {}
+        self.__words = set()
+    def addWord(self,
+                word: str,
+                root: Word,
+                index=0):
+        """
+        The addWord method takes a String word, an index, and a Word root as inputs. First it creates a TrieNode child
+        and it directly adds it to the set when the given index is equal to the length of given word.
+        Then, it extracts the character at given index of given word and if children dictionary contains a mapping for
+        the extracted character, it assigns it to the TrieNode child, else it creates a new TrieNode and assigns it to
+        the child. At the end, it recursively calls the addWord method with the next index of child and puts the
+        character with the child into the children dictionary.
+        PARAMETERS
+        ----------
+        word : str
+            String input.
+        index : int
+            Integer index.
+        root : Word
+            Word input to add.
+        """
+        if index == len(word):
+            self.__words.add(root)
+            return
+        ch = word[index]
+        if ch in self.__children:
+            child = self.__children[ch]
+        else:
+            child = TrieNode()
+        child.addWord(word, root, index + 1)
+        self.__children[ch] = child
+    def getChild(self, ch: chr) -> TrieNode:
+        """
+        The getChild method takes a character and gets its corresponding value from children dictionary.
+        PARAMETERS
+        ----------
+        ch : chr
+            Character input.
+        RETURNS
+        -------
+        TreeNode
+            the value from children dictionary.
+        """
+        return self.__children.get(ch)
+    def getWords(self) -> set[Word]:
+        """
+        The getWords method returns the words set.
+        RETURNS
+        -------
+        set
+            the words set.
+        """
+        return self.__words

nlptoolkit_dictionary-1.0.36/Dictionary/Trie/__init__.py ADDED Viewed

File without changes