LanguageStatisticsLibPy 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ '''
2
+ Copyright 2024 Nils Kopal, Bernhard Esslinger, CrypTool Team
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ '''
16
+ import numpy as np
17
+ import os
18
+ from languagestatisticslibpy.Grams import Grams
19
+ from languagestatisticslibpy.GramsType import GramsType
20
+ from languagestatisticslibpy.LanguageStatisticsFile import LanguageStatisticsFile
21
+
22
+ class Tetragrams(Grams):
23
+ def __init__(self, language, language_statistics_directory, use_spaces=False):
24
+ """
25
+ Initializes the Tetragrams class by calling the parent class (Grams) initializer.
26
+
27
+ Parameters:
28
+ - language (str): The language of the tetragram statistics.
29
+ - language_statistics_directory (str): Path to the directory containing language statistics files.
30
+ - use_spaces (bool): Whether to include spaces in the analysis (default: False).
31
+ """
32
+ super().__init__(language, language_statistics_directory, use_spaces)
33
+
34
+ def load_gz(self, filename, language_statistics_directory):
35
+ """
36
+ Loads a gzip-compressed file containing tetragram frequencies.
37
+
38
+ Parameters:
39
+ - filename (str): The name of the file to load.
40
+ - language_statistics_directory (str): The directory where the statistics file is located.
41
+
42
+ Sets:
43
+ - self.frequencies (np.ndarray): A 4D array of tetragram frequencies.
44
+ - self.alphabet (list): The alphabet used in the statistics file.
45
+ - self.max_value (float): The maximum value in the frequencies array, or -∞ if the array is empty.
46
+ """
47
+ file_path = os.path.join(language_statistics_directory, filename)
48
+ language_statistics_file = LanguageStatisticsFile(file_path)
49
+ self.frequencies = language_statistics_file.load_frequencies(4)
50
+ self.alphabet = language_statistics_file.alphabet
51
+ self.max_value = np.max(self.frequencies) if self.frequencies.size > 0 else float('-inf')
52
+
53
+ def calculate_cost(self, text):
54
+ """
55
+ Calculates the cost of a given text based on tetragram frequencies.
56
+
57
+ Parameters:
58
+ - text (str): The text to analyze.
59
+
60
+ Returns:
61
+ - float: The average cost of tetragrams in the text. Returns 0.0 if the text length is less than 4.
62
+
63
+ Notes:
64
+ - Skips tetragrams containing characters outside the defined alphabet.
65
+ - If `add_letter_indices` is defined, modifies the index of the characters before computing the cost.
66
+ """
67
+ if len(text) < 4:
68
+ return 0.0
69
+
70
+ value = 0.0
71
+ alphabet_length = len(self.alphabet)
72
+ end = len(text) - 3
73
+
74
+ for i in range(end):
75
+ a, b, c, d = text[i:i+4]
76
+
77
+ if self.add_letter_indices:
78
+ a += self.add_letter_indices.get(a, 0)
79
+ b += self.add_letter_indices.get(b, 0)
80
+ c += self.add_letter_indices.get(c, 0)
81
+ d += self.add_letter_indices.get(d, 0)
82
+
83
+ if 0 <= a < alphabet_length and 0 <= b < alphabet_length and \
84
+ 0 <= c < alphabet_length and 0 <= d < alphabet_length:
85
+ value += self.frequencies[a, b, c, d]
86
+
87
+ return value / end
88
+
89
+ def gram_size(self):
90
+ """
91
+ Returns the size of the grams being analyzed (tetragrams in this case).
92
+
93
+ Returns:
94
+ - int: The size of the grams (always 4 for tetragrams).
95
+ """
96
+ return 4
97
+
98
+ def grams_type(self):
99
+ """
100
+ Returns the type of grams being analyzed.
101
+
102
+ Returns:
103
+ - GramsType: An enum value representing the type of grams (GramsType.Tetragrams).
104
+ """
105
+ return GramsType.Tetragrams
106
+
107
+ def normalize(self, max_value):
108
+ """
109
+ Normalizes the tetragram frequencies based on the provided maximum value.
110
+
111
+ Parameters:
112
+ - max_value (float): The maximum value used for normalization.
113
+
114
+ Notes:
115
+ - Adjusts all frequencies proportionally to the new maximum value.
116
+ - Updates `self.max_value` to the new maximum after normalization.
117
+ """
118
+ super().normalize(max_value)
119
+ adjust_value = self.max_value * max_value
120
+ for a in range(len(self.alphabet)):
121
+ for b in range(len(self.alphabet)):
122
+ for c in range(len(self.alphabet)):
123
+ for d in range(len(self.alphabet)):
124
+ self.frequencies[a, b, c, d] = adjust_value / self.frequencies[a, b, c, d]
125
+ self.max_value = np.max(self.frequencies) if self.frequencies.size > 0 else float('-inf')
@@ -0,0 +1,125 @@
1
+ '''
2
+ Copyright 2024 Nils Kopal, Bernhard Esslinger, CrypTool Team
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ '''
16
+ import numpy as np
17
+ import os
18
+ from languagestatisticslibpy.Grams import Grams
19
+ from languagestatisticslibpy.GramsType import GramsType
20
+ from languagestatisticslibpy.LanguageStatisticsFile import LanguageStatisticsFile
21
+
22
+ class Trigrams(Grams):
23
+ def __init__(self, language, language_statistics_directory, use_spaces=False):
24
+ """
25
+ Initializes the Trigrams class by calling the parent class (Grams) initializer.
26
+
27
+ Parameters:
28
+ - language (str): The language of the trigram statistics.
29
+ - language_statistics_directory (str): Path to the directory containing language statistics files.
30
+ - use_spaces (bool): Whether to include spaces in the analysis (default: False).
31
+ """
32
+ super().__init__(language, language_statistics_directory, use_spaces)
33
+
34
+ def load_gz(self, filename, language_statistics_directory):
35
+ """
36
+ Loads a gzip-compressed file containing trigram frequencies.
37
+
38
+ Parameters:
39
+ - filename (str): The name of the file to load.
40
+ - language_statistics_directory (str): The directory where the statistics file is located.
41
+
42
+ Sets:
43
+ - self.frequencies (np.ndarray): A 3D array of trigram frequencies.
44
+ - self.alphabet (list): The alphabet used in the statistics file.
45
+ - self.max_value (float): The maximum value in the frequencies array, or -∞ if the array is empty.
46
+ """
47
+ file_path = os.path.join(language_statistics_directory, filename)
48
+ language_statistics_file = LanguageStatisticsFile(file_path)
49
+ self.frequencies = language_statistics_file.load_frequencies(3)
50
+ self.alphabet = language_statistics_file.alphabet
51
+ self.max_value = np.max(self.frequencies) if self.frequencies.size > 0 else float('-inf')
52
+
53
+ def calculate_cost(self, text):
54
+ """
55
+ Calculates the cost of a given text based on trigram frequencies.
56
+
57
+ Parameters:
58
+ - text (str): The text to analyze.
59
+
60
+ Returns:
61
+ - float: The average cost of trigrams in the text. Returns 0 if the text length is less than 3.
62
+
63
+ Notes:
64
+ - Skips trigrams containing characters outside the defined alphabet.
65
+ - If `add_letter_indices` is defined, modifies indices of the characters before computing the cost.
66
+ """
67
+ if len(text) < 3:
68
+ return 0
69
+
70
+ value = 0
71
+ alphabet_length = len(self.alphabet)
72
+ end = len(text) - 2
73
+
74
+ for i in range(end):
75
+ a = text[i]
76
+ b = text[i + 1]
77
+ c = text[i + 2]
78
+
79
+ if self.add_letter_indices:
80
+ a += self.add_letter_indices.get(a, 0)
81
+ b += self.add_letter_indices.get(b, 0)
82
+ c += self.add_letter_indices.get(c, 0)
83
+
84
+ if a >= alphabet_length or b >= alphabet_length or c >= alphabet_length or a < 0 or b < 0 or c < 0:
85
+ continue
86
+ value += self.frequencies[a, b, c]
87
+
88
+ return value / end
89
+
90
+ def gram_size(self):
91
+ """
92
+ Returns the size of the grams being analyzed (trigrams in this case).
93
+
94
+ Returns:
95
+ - int: The size of the grams (always 3 for trigrams).
96
+ """
97
+ return 3
98
+
99
+ def grams_type(self):
100
+ """
101
+ Returns the type of grams being analyzed.
102
+
103
+ Returns:
104
+ - GramsType: An enum value representing the type of grams (GramsType.Trigrams).
105
+ """
106
+ return GramsType.Trigrams
107
+
108
+ def normalize(self, max_value):
109
+ """
110
+ Normalizes the trigram frequencies based on the provided maximum value.
111
+
112
+ Parameters:
113
+ - max_value (float): The maximum value used for normalization.
114
+
115
+ Notes:
116
+ - Adjusts all frequencies proportionally to the new maximum value.
117
+ - Updates `self.max_value` to the new maximum after normalization.
118
+ """
119
+ super().normalize(max_value)
120
+ adjust_value = self.max_value * max_value
121
+ for a in range(len(self.alphabet)):
122
+ for b in range(len(self.alphabet)):
123
+ for c in range(len(self.alphabet)):
124
+ self.frequencies[a, b, c] = adjust_value / self.frequencies[a, b, c]
125
+ self.max_value = np.max(self.frequencies) if self.frequencies.size > 0 else float('-inf')
@@ -0,0 +1,110 @@
1
+ '''
2
+ Copyright 2024 Nils Kopal, Bernhard Esslinger, CrypTool Team
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ '''
16
+ import numpy as np
17
+ import os
18
+ from languagestatisticslibpy.Grams import Grams
19
+ from languagestatisticslibpy.GramsType import GramsType
20
+ from languagestatisticslibpy.LanguageStatisticsFile import LanguageStatisticsFile
21
+
22
+ class Unigrams(Grams):
23
+ def __init__(self, language, language_statistics_directory, use_spaces=False):
24
+ """
25
+ Initializes the Unigrams class by calling the parent class (Grams) initializer.
26
+
27
+ Parameters:
28
+ - language (str): The language of the unigram statistics.
29
+ - language_statistics_directory (str): Path to the directory containing language statistics files.
30
+ - use_spaces (bool): Whether to include spaces in the analysis (default: False).
31
+ """
32
+ super().__init__(language, language_statistics_directory, use_spaces)
33
+
34
+ def load_gz(self, filename, language_statistics_directory):
35
+ """
36
+ Loads a gzip-compressed file containing unigram frequencies.
37
+
38
+ Parameters:
39
+ - filename (str): The name of the file to load.
40
+ - language_statistics_directory (str): The directory where the statistics file is located.
41
+
42
+ Sets:
43
+ - self.frequencies (np.ndarray): A 1D array of unigram frequencies.
44
+ - self.alphabet (list): The alphabet used in the statistics file.
45
+ - self.max_value (float): The maximum value in the frequencies array, or -∞ if the array is empty.
46
+ """
47
+ file_path = os.path.join(language_statistics_directory, filename)
48
+ language_statistics_file = LanguageStatisticsFile(file_path)
49
+ self.frequencies = language_statistics_file.load_frequencies(1)
50
+ self.alphabet = language_statistics_file.alphabet
51
+ self.max_value = np.max(self.frequencies) if self.frequencies.size > 0 else float('-inf')
52
+
53
+ def calculate_cost(self, text):
54
+ """
55
+ Calculates the cost of a given text based on unigram frequencies.
56
+
57
+ Parameters:
58
+ - text (str): The text to analyze.
59
+
60
+ Returns:
61
+ - float: The average cost of unigrams in the text. Returns 0.0 if the text is empty.
62
+
63
+ Notes:
64
+ - Skips characters that are outside the defined alphabet.
65
+ - If `add_letter_indices` is defined, modifies the index of the character before computing the cost.
66
+ """
67
+ if len(text) == 0:
68
+ return 0.0
69
+
70
+ value = 0.0
71
+ for i in text:
72
+ if self.add_letter_indices:
73
+ i += self.add_letter_indices.get(i, 0)
74
+ if 0 <= i < len(self.alphabet):
75
+ value += self.frequencies[i]
76
+ return value / len(text)
77
+
78
+ def gram_size(self):
79
+ """
80
+ Returns the size of the grams being analyzed (unigrams in this case).
81
+
82
+ Returns:
83
+ - int: The size of the grams (always 1 for unigrams).
84
+ """
85
+ return 1
86
+
87
+ def grams_type(self):
88
+ """
89
+ Returns the type of grams being analyzed.
90
+
91
+ Returns:
92
+ - GramsType: An enum value representing the type of grams (GramsType.Unigrams).
93
+ """
94
+ return GramsType.Unigrams
95
+
96
+ def normalize(self, max_value):
97
+ """
98
+ Normalizes the unigram frequencies based on the provided maximum value.
99
+
100
+ Parameters:
101
+ - max_value (float): The maximum value used for normalization.
102
+
103
+ Notes:
104
+ - Adjusts all frequencies proportionally to the new maximum value.
105
+ - Updates `self.max_value` to the new maximum after normalization.
106
+ """
107
+ super().normalize(max_value)
108
+ adjust_value = self.max_value * max_value
109
+ for a in range(len(self.alphabet)):
110
+ self.frequencies[a] = adjust_value / self.frequencies[a]
@@ -0,0 +1,162 @@
1
+ '''
2
+ Copyright 2024 Nils Kopal, Bernhard Esslinger, CrypTool Team
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ '''
16
+ from io import BufferedReader
17
+ from collections import deque
18
+ from languagestatisticslibpy.Node import Node
19
+
20
+ class WordTree(Node):
21
+ """
22
+ Represents a tree data structure for storing words and efficiently querying them.
23
+
24
+ Inherits:
25
+ - Node: The base class for tree nodes, where each node represents a character.
26
+
27
+ Attributes:
28
+ - stored_words (int): The number of words stored in the tree.
29
+ - language_code (str): The language code for the words stored in the tree.
30
+ - alphabet (str): The alphabet used in the stored words.
31
+ """
32
+
33
+ def __init__(self):
34
+ """
35
+ Initializes an empty WordTree.
36
+
37
+ Initializes:
38
+ - stored_words (int): Set to 0, as no words are initially stored.
39
+ - language_code (str): Empty, to be set during deserialization.
40
+ - alphabet (str): Empty, to be set during deserialization.
41
+ """
42
+ super().__init__()
43
+ self.stored_words = 0
44
+ self.language_code = ''
45
+ self.alphabet = ''
46
+
47
+ @staticmethod
48
+ def deserialize(reader: BufferedReader):
49
+ """
50
+ Deserializes a WordTree from a binary file.
51
+
52
+ Parameters:
53
+ - reader (BufferedReader): A binary file reader containing the serialized WordTree.
54
+
55
+ Returns:
56
+ - WordTree: The deserialized WordTree object.
57
+
58
+ Raises:
59
+ - Exception: If the file format is invalid or the magic number does not match.
60
+
61
+ Process:
62
+ 1. Reads the file header and validates the magic number.
63
+ 2. Reads the language code and alphabet.
64
+ 3. Reads the number of stored words.
65
+ 4. Constructs the WordTree structure by iterating through the file's serialized data.
66
+ """
67
+ tree = WordTree()
68
+
69
+ # Load word tree header
70
+ magic_no = reader.read(6).decode('utf-8')
71
+ if magic_no != "CT2DIC":
72
+ raise Exception("File does not start with the expected magic number for word tree.")
73
+
74
+ # Read language code
75
+ tree.language_code = ''
76
+ char = reader.read(1).decode('utf-8')
77
+ while char != '\0':
78
+ tree.language_code += char
79
+ char = reader.read(1).decode('utf-8')
80
+
81
+ # Read alphabet
82
+ tree.alphabet = ''
83
+ char = reader.read(1).decode('utf-8')
84
+ while char != '\0':
85
+ tree.alphabet += char
86
+ char = reader.read(1).decode('utf-8')
87
+
88
+ # Read number of stored words
89
+ tree.stored_words = int.from_bytes(reader.read(4), 'little')
90
+
91
+ # Load word tree data structure
92
+ stack = deque([tree])
93
+ byte = reader.read(1)
94
+ while byte:
95
+ char = byte.decode('utf-8')
96
+ if char == Node.WordEndSymbol:
97
+ stack[-1].word_ends_here = True
98
+ tree.stored_words += 1
99
+ elif char == Node.TerminationSymbol:
100
+ stack.pop()
101
+ else:
102
+ new_node = Node(char)
103
+ stack[-1].child_nodes.append(new_node)
104
+ stack.append(new_node)
105
+ byte = reader.read(1)
106
+
107
+ return tree
108
+
109
+ def contains_word(self, word):
110
+ """
111
+ Checks whether a given word exists in the WordTree.
112
+
113
+ Parameters:
114
+ - word (str): The word to search for.
115
+
116
+ Returns:
117
+ - bool: True if the word exists in the tree, False otherwise.
118
+
119
+ Process:
120
+ 1. Converts the word to uppercase for case-insensitive comparison.
121
+ 2. Traverses the tree to find the sequence of characters in the word.
122
+ 3. Returns False if any character is missing in the tree structure.
123
+ """
124
+ word = word.upper()
125
+ current_node = self
126
+ for char in word:
127
+ found_node = None
128
+ for child_node in current_node.child_nodes:
129
+ if child_node.value == char:
130
+ current_node = child_node
131
+ found_node = True
132
+ break
133
+ if not found_node:
134
+ return False
135
+ return True
136
+
137
+ def to_list(self):
138
+ """
139
+ Converts all words stored in the WordTree into a list.
140
+
141
+ Returns:
142
+ - list: A list of all words stored in the tree.
143
+
144
+ Process:
145
+ 1. Traverses the tree using a stack to collect characters.
146
+ 2. Adds a word to the list whenever a node marks the end of a word.
147
+ """
148
+ list_of_words = []
149
+ stack = deque()
150
+
151
+ def add_node_to_list(node, stack):
152
+ stack.append(node.value)
153
+ if node.word_ends_here:
154
+ list_of_words.append(''.join(stack))
155
+ for child_node in node.child_nodes:
156
+ add_node_to_list(child_node, deque(stack))
157
+ stack.pop()
158
+
159
+ for node in self.child_nodes:
160
+ add_node_to_list(node, stack)
161
+
162
+ return list_of_words
File without changes
@@ -0,0 +1,26 @@
1
+ '''
2
+ Copyright 2024 Nils Kopal, Bernhard Esslinger, CrypTool Team
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Usage: python3 test1.py
17
+ test1.py is a minimal working example (MWE) which just needs the
18
+ package LanguageStatisticsLibPy to be installed.
19
+ '''
20
+
21
+ from languagestatisticslibpy.LanguageStatistics import LanguageStatistics as LS
22
+
23
+ plaintext = LS.map_text_into_number_space("HELLOWORD", LS.alphabets['en'])
24
+ ioc = LS.calculate_ioc(plaintext)
25
+
26
+ print(ioc)
@@ -0,0 +1,80 @@
1
+ '''
2
+ Copyright 2024 Nils Kopal, Bernhard Esslinger, CrypTool Team
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+
16
+ Usage: python3 test2.py
17
+ test2.py just needs the package LanguageStatisticsLibPy AND all the language
18
+ statistics (n-grams) and dictionary files to be installed on your computer.
19
+ '''
20
+
21
+ from languagestatisticslibpy.LanguageStatistics import LanguageStatistics as LS
22
+ from datetime import datetime
23
+
24
+ # Change this path to the folder where the CrypTool-2 language statistics and
25
+ # dictionary files are stored, e.g. the folder "LanguageStatistics" in the standard
26
+ # CrypTool-2 installation folder if you have installed CrypTool 2 on Windows.
27
+ # Sample directory:
28
+ # ct2_language_statistics_folder = "C:\\Program Files\\CrypTool 2\\LanguageStatistics" # Windows
29
+ # ct2_language_statistics_folder = "/home/be/tmp/LanguageStatisticsLibPy_PIP-Test/LSLP/" # Linux
30
+ ct2_language_statistics_folder = "/Users/be/Documents/Python/LanguageStatisticsLibPy_PIP-Test/LSLP" # Mac (Note: gz file not found if path starts with ~)
31
+
32
+
33
+ # test the cost calculation of all gram classes except hexagrams
34
+ for i in range(1, 6):
35
+
36
+ #write current ngram size
37
+ print("Grams size:", i)
38
+
39
+ #load grams for English
40
+ start = datetime.now()
41
+ grams = LS.create_grams_by_size(i, "en", ct2_language_statistics_folder, False)
42
+ print("\tGrams loaded in", (datetime.now() - start))
43
+
44
+ #normalize the grams
45
+ start = datetime.now()
46
+ grams.normalize(1000000.0)
47
+ print("\tGrams normalized in", (datetime.now() - start))
48
+
49
+ #map the text into the number space of the grams
50
+ numbers = LS.map_text_into_number_space("HELLOWORLDTHISISATEST", grams.alphabet)
51
+
52
+ #calculate the cost of the text
53
+ cost = grams.calculate_cost(numbers)
54
+
55
+ #convert the numbers back into the text space
56
+ text = LS.map_numbers_into_text_space(numbers, grams.alphabet)
57
+ print("\tText:", text)
58
+
59
+ #print the cost
60
+ print("\tCost value:", cost)
61
+
62
+ #Test the word tree
63
+ #Hint: the word tree works with strings instead of number arrays
64
+ print("Loading word tree")
65
+ start = datetime.now()
66
+ tree = LS.load_word_tree("en", ct2_language_statistics_folder)
67
+ print("\tWord tree loaded", (datetime.now() - start))
68
+ print("\tTotal number of words in tree", tree.stored_words)
69
+
70
+ word = "Hello"
71
+ print("Word:", word)
72
+ print("\tContains word:", tree.contains_word(word))
73
+
74
+ word = "World"
75
+ print("Word:", word)
76
+ print("\tContains word:", tree.contains_word(word))
77
+
78
+ word = "HelloWorld"
79
+ print("Word:", word)
80
+ print("\tContains word:", tree.contains_word(word))