LanguageStatisticsLibPy 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- languagestatisticslibpy/LanguageStatisticsFile.py +48 -40
- languagestatisticslibpy/Tetragrams.py +125 -125
- languagestatisticslibpy/Trigrams.py +125 -125
- languagestatisticslibpy/Unigrams.py +110 -110
- languagestatisticslibpy/WordTree.py +161 -161
- languagestatisticslibpy/test1.py +26 -26
- languagestatisticslibpy/test2.py +80 -80
- {languagestatisticslibpy-1.0.3.dist-info → languagestatisticslibpy-1.0.4.dist-info}/METADATA +4 -2
- languagestatisticslibpy-1.0.4.dist-info/RECORD +19 -0
- {languagestatisticslibpy-1.0.3.dist-info → languagestatisticslibpy-1.0.4.dist-info}/WHEEL +1 -1
- languagestatisticslibpy-1.0.3.dist-info/RECORD +0 -19
- {languagestatisticslibpy-1.0.3.dist-info → languagestatisticslibpy-1.0.4.dist-info/licenses}/LICENSE +0 -0
languagestatisticslibpy/test2.py
CHANGED
|
@@ -1,80 +1,80 @@
|
|
|
1
|
-
'''
|
|
2
|
-
Copyright 2024 Nils Kopal, Bernhard Esslinger, CrypTool Team
|
|
3
|
-
|
|
4
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
you may not use this file except in compliance with the License.
|
|
6
|
-
You may obtain a copy of the License at
|
|
7
|
-
|
|
8
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
|
|
10
|
-
Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
See the License for the specific language governing permissions and
|
|
14
|
-
limitations under the License.
|
|
15
|
-
|
|
16
|
-
Usage: python3 test2.py
|
|
17
|
-
test2.py just needs the package LanguageStatisticsLibPy AND all the language
|
|
18
|
-
statistics (n-grams) and dictionary files to be installed on your computer.
|
|
19
|
-
'''
|
|
20
|
-
|
|
21
|
-
from languagestatisticslibpy.LanguageStatistics import LanguageStatistics as LS
|
|
22
|
-
from datetime import datetime
|
|
23
|
-
|
|
24
|
-
# Change this path to the folder where the CrypTool-2 language statistics and
|
|
25
|
-
# dictionary files are stored, e.g. the folder "LanguageStatistics" in the standard
|
|
26
|
-
# CrypTool-2 installation folder if you have installed CrypTool 2 on Windows.
|
|
27
|
-
# Sample directory:
|
|
28
|
-
# ct2_language_statistics_folder = "C:\\Program Files\\CrypTool 2\\LanguageStatistics" # Windows
|
|
29
|
-
# ct2_language_statistics_folder = "/home/be/tmp/LanguageStatisticsLibPy_PIP-Test/LSLP/" # Linux
|
|
30
|
-
ct2_language_statistics_folder = "/Users/be/Documents/Python/LanguageStatisticsLibPy_PIP-Test/LSLP" # Mac (Note: gz file not found if path starts with ~)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
# test the cost calculation of all gram classes except hexagrams
|
|
34
|
-
for i in range(1, 6):
|
|
35
|
-
|
|
36
|
-
#write current ngram size
|
|
37
|
-
print("Grams size:", i)
|
|
38
|
-
|
|
39
|
-
#load grams for English
|
|
40
|
-
start = datetime.now()
|
|
41
|
-
grams = LS.create_grams_by_size(i, "en", ct2_language_statistics_folder, False)
|
|
42
|
-
print("\tGrams loaded in", (datetime.now() - start))
|
|
43
|
-
|
|
44
|
-
#normalize the grams
|
|
45
|
-
start = datetime.now()
|
|
46
|
-
grams.normalize(1000000.0)
|
|
47
|
-
print("\tGrams normalized in", (datetime.now() - start))
|
|
48
|
-
|
|
49
|
-
#map the text into the number space of the grams
|
|
50
|
-
numbers = LS.map_text_into_number_space("HELLOWORLDTHISISATEST", grams.alphabet)
|
|
51
|
-
|
|
52
|
-
#calculate the cost of the text
|
|
53
|
-
cost = grams.calculate_cost(numbers)
|
|
54
|
-
|
|
55
|
-
#convert the numbers back into the text space
|
|
56
|
-
text = LS.map_numbers_into_text_space(numbers, grams.alphabet)
|
|
57
|
-
print("\tText:", text)
|
|
58
|
-
|
|
59
|
-
#print the cost
|
|
60
|
-
print("\tCost value:", cost)
|
|
61
|
-
|
|
62
|
-
#Test the word tree
|
|
63
|
-
#Hint: the word tree works with strings instead of number arrays
|
|
64
|
-
print("Loading word tree")
|
|
65
|
-
start = datetime.now()
|
|
66
|
-
tree = LS.load_word_tree("en", ct2_language_statistics_folder)
|
|
67
|
-
print("\tWord tree loaded", (datetime.now() - start))
|
|
68
|
-
print("\tTotal number of words in tree", tree.stored_words)
|
|
69
|
-
|
|
70
|
-
word = "Hello"
|
|
71
|
-
print("Word:", word)
|
|
72
|
-
print("\tContains word:", tree.contains_word(word))
|
|
73
|
-
|
|
74
|
-
word = "World"
|
|
75
|
-
print("Word:", word)
|
|
76
|
-
print("\tContains word:", tree.contains_word(word))
|
|
77
|
-
|
|
78
|
-
word = "HelloWorld"
|
|
79
|
-
print("Word:", word)
|
|
80
|
-
print("\tContains word:", tree.contains_word(word))
|
|
1
|
+
'''
|
|
2
|
+
Copyright 2024 Nils Kopal, Bernhard Esslinger, CrypTool Team
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
|
|
16
|
+
Usage: python3 test2.py
|
|
17
|
+
test2.py just needs the package LanguageStatisticsLibPy AND all the language
|
|
18
|
+
statistics (n-grams) and dictionary files to be installed on your computer.
|
|
19
|
+
'''
|
|
20
|
+
|
|
21
|
+
from languagestatisticslibpy.LanguageStatistics import LanguageStatistics as LS
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
|
|
24
|
+
# Change this path to the folder where the CrypTool-2 language statistics and
|
|
25
|
+
# dictionary files are stored, e.g. the folder "LanguageStatistics" in the standard
|
|
26
|
+
# CrypTool-2 installation folder if you have installed CrypTool 2 on Windows.
|
|
27
|
+
# Sample directory:
|
|
28
|
+
# ct2_language_statistics_folder = "C:\\Program Files\\CrypTool 2\\LanguageStatistics" # Windows
|
|
29
|
+
# ct2_language_statistics_folder = "/home/be/tmp/LanguageStatisticsLibPy_PIP-Test/LSLP/" # Linux
|
|
30
|
+
ct2_language_statistics_folder = "/Users/be/Documents/Python/LanguageStatisticsLibPy_PIP-Test/LSLP" # Mac (Note: gz file not found if path starts with ~)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# test the cost calculation of all gram classes except hexagrams
|
|
34
|
+
for i in range(1, 6):
|
|
35
|
+
|
|
36
|
+
#write current ngram size
|
|
37
|
+
print("Grams size:", i)
|
|
38
|
+
|
|
39
|
+
#load grams for English
|
|
40
|
+
start = datetime.now()
|
|
41
|
+
grams = LS.create_grams_by_size(i, "en", ct2_language_statistics_folder, False)
|
|
42
|
+
print("\tGrams loaded in", (datetime.now() - start))
|
|
43
|
+
|
|
44
|
+
#normalize the grams
|
|
45
|
+
start = datetime.now()
|
|
46
|
+
grams.normalize(1000000.0)
|
|
47
|
+
print("\tGrams normalized in", (datetime.now() - start))
|
|
48
|
+
|
|
49
|
+
#map the text into the number space of the grams
|
|
50
|
+
numbers = LS.map_text_into_number_space("HELLOWORLDTHISISATEST", grams.alphabet)
|
|
51
|
+
|
|
52
|
+
#calculate the cost of the text
|
|
53
|
+
cost = grams.calculate_cost(numbers)
|
|
54
|
+
|
|
55
|
+
#convert the numbers back into the text space
|
|
56
|
+
text = LS.map_numbers_into_text_space(numbers, grams.alphabet)
|
|
57
|
+
print("\tText:", text)
|
|
58
|
+
|
|
59
|
+
#print the cost
|
|
60
|
+
print("\tCost value:", cost)
|
|
61
|
+
|
|
62
|
+
#Test the word tree
|
|
63
|
+
#Hint: the word tree works with strings instead of number arrays
|
|
64
|
+
print("Loading word tree")
|
|
65
|
+
start = datetime.now()
|
|
66
|
+
tree = LS.load_word_tree("en", ct2_language_statistics_folder)
|
|
67
|
+
print("\tWord tree loaded", (datetime.now() - start))
|
|
68
|
+
print("\tTotal number of words in tree", tree.stored_words)
|
|
69
|
+
|
|
70
|
+
word = "Hello"
|
|
71
|
+
print("Word:", word)
|
|
72
|
+
print("\tContains word:", tree.contains_word(word))
|
|
73
|
+
|
|
74
|
+
word = "World"
|
|
75
|
+
print("Word:", word)
|
|
76
|
+
print("\tContains word:", tree.contains_word(word))
|
|
77
|
+
|
|
78
|
+
word = "HelloWorld"
|
|
79
|
+
print("Word:", word)
|
|
80
|
+
print("\tContains word:", tree.contains_word(word))
|
{languagestatisticslibpy-1.0.3.dist-info → languagestatisticslibpy-1.0.4.dist-info}/METADATA
RENAMED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: LanguageStatisticsLibPy
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: Quick and robust Python functions to do cryptanalysis using language statistics data for 15 languages
|
|
5
5
|
License: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Nils Kopal and Bernhard Esslinger
|
|
7
8
|
Author-email: kopal@cryptool.org
|
|
8
9
|
Requires-Python: >=3.10,<4.0
|
|
@@ -13,6 +14,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
16
|
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
18
|
Project-URL: Homepage, https://github.com/CrypToolProject/LanguageStatisticsLibPy
|
|
17
19
|
Description-Content-Type: text/markdown
|
|
18
20
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
languagestatisticslibpy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
languagestatisticslibpy/Bigrams.py,sha256=LxvUp2UYqkL1G-5ReY5YG44YciyAv3kTj2IFkycsVNw,4665
|
|
3
|
+
languagestatisticslibpy/Grams.py,sha256=sirQsaLjY1osObiP1V9yDrGpcqaxgzjSU4FVyr0kvIE,4752
|
|
4
|
+
languagestatisticslibpy/GramsType.py,sha256=yleCEEKbZRFmzA3KIwXxYU8dL7Oaztm9K32WXvwL2rE,1702
|
|
5
|
+
languagestatisticslibpy/Hexagrams.py,sha256=UaQDY7dIVIOwcwo18g-mO-dZT3B6dYo4YnJrYGiN3Gw,5326
|
|
6
|
+
languagestatisticslibpy/LanguageStatistics.py,sha256=Wmd3pMaGWuPYeUojfPHl9Ero94ufuZDh_Qymm9ZvAFs,15566
|
|
7
|
+
languagestatisticslibpy/LanguageStatisticsFile.py,sha256=68mXgj9ZuNN0kHLKsWDlcnq23lzq_QEDXpKRd_tkVAs,5188
|
|
8
|
+
languagestatisticslibpy/Node.py,sha256=6Q6H7iEoIpBDpLnFBVEqZl-3LdynBOijRKRjhFaMe8w,3337
|
|
9
|
+
languagestatisticslibpy/Pentagrams.py,sha256=4VJz7L61yUR5deShv5BX-Fl1bjPPuGTeWiCcScjmV1w,5175
|
|
10
|
+
languagestatisticslibpy/test1.py,sha256=mOSw6yBBkEokBEZUrrFO-ZZobKbM-nMCBhQhA5KZjaE,1001
|
|
11
|
+
languagestatisticslibpy/test2.py,sha256=7IfF5EbeHx8wWAoerYxrRdU4gl21x8n4rpRv8oxWaQE,3109
|
|
12
|
+
languagestatisticslibpy/Tetragrams.py,sha256=YHvHON7PedaMw61-CyEcepmpQF51HRM9fnNDP2I5WJo,5123
|
|
13
|
+
languagestatisticslibpy/Trigrams.py,sha256=zYL_fOraatjZT1Ja5G-UZYWpV0kYJ4nekchQFwuvH7E,4996
|
|
14
|
+
languagestatisticslibpy/Unigrams.py,sha256=ufwP35aYuD-QR6LUWfPtZ0SyBnf8wMNs7ZFE7xKTaFw,4385
|
|
15
|
+
languagestatisticslibpy/WordTree.py,sha256=wv8KKTMfGC0NQGXR1UsH-yHYc_kMvP9dSPo0jqVtUo4,5564
|
|
16
|
+
languagestatisticslibpy-1.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
17
|
+
languagestatisticslibpy-1.0.4.dist-info/METADATA,sha256=FUBsyNtOFPeXivfZ2Dond9ppDSPd0cRYjCXOGNqBC8w,6538
|
|
18
|
+
languagestatisticslibpy-1.0.4.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
|
|
19
|
+
languagestatisticslibpy-1.0.4.dist-info/RECORD,,
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
languagestatisticslibpy/Bigrams.py,sha256=LxvUp2UYqkL1G-5ReY5YG44YciyAv3kTj2IFkycsVNw,4665
|
|
2
|
-
languagestatisticslibpy/Grams.py,sha256=sirQsaLjY1osObiP1V9yDrGpcqaxgzjSU4FVyr0kvIE,4752
|
|
3
|
-
languagestatisticslibpy/GramsType.py,sha256=yleCEEKbZRFmzA3KIwXxYU8dL7Oaztm9K32WXvwL2rE,1702
|
|
4
|
-
languagestatisticslibpy/Hexagrams.py,sha256=UaQDY7dIVIOwcwo18g-mO-dZT3B6dYo4YnJrYGiN3Gw,5326
|
|
5
|
-
languagestatisticslibpy/LanguageStatistics.py,sha256=Wmd3pMaGWuPYeUojfPHl9Ero94ufuZDh_Qymm9ZvAFs,15566
|
|
6
|
-
languagestatisticslibpy/LanguageStatisticsFile.py,sha256=2oGzXse25G4r5-8ryqY389b0E4WD1kUv7UjCAGItdGc,4449
|
|
7
|
-
languagestatisticslibpy/Node.py,sha256=6Q6H7iEoIpBDpLnFBVEqZl-3LdynBOijRKRjhFaMe8w,3337
|
|
8
|
-
languagestatisticslibpy/Pentagrams.py,sha256=4VJz7L61yUR5deShv5BX-Fl1bjPPuGTeWiCcScjmV1w,5175
|
|
9
|
-
languagestatisticslibpy/Tetragrams.py,sha256=ckl8rtqHaftKlROLvPGer4CwtNBfOar8Yp4sLssyNXE,4998
|
|
10
|
-
languagestatisticslibpy/Trigrams.py,sha256=b2CKp55Xl7VYTgIkWxlQcm2KbSh1Oknfg6fnjyS0y10,4871
|
|
11
|
-
languagestatisticslibpy/Unigrams.py,sha256=rEu4H_bsRGsWWkNRk99Oo7gHRgjSVMr3MjzuRYSwHSI,4275
|
|
12
|
-
languagestatisticslibpy/WordTree.py,sha256=hJ6damZph5kioMJe3lexWKZxxuKPRzTqOeS5l28gN-k,5403
|
|
13
|
-
languagestatisticslibpy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
languagestatisticslibpy/test1.py,sha256=q3eKGIWusia5mRWJsJUDRo34k0n1QM2LS3pKkswDIIM,975
|
|
15
|
-
languagestatisticslibpy/test2.py,sha256=BzMGXn3BecvDGoM7sKev9eeNsOCdYa90qOy7l9GZ1SA,3029
|
|
16
|
-
languagestatisticslibpy-1.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
17
|
-
languagestatisticslibpy-1.0.3.dist-info/METADATA,sha256=mE6blewaJgNNmymP-d4SsxREy8st80Fl92hqDMXEk40,6465
|
|
18
|
-
languagestatisticslibpy-1.0.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
19
|
-
languagestatisticslibpy-1.0.3.dist-info/RECORD,,
|
{languagestatisticslibpy-1.0.3.dist-info → languagestatisticslibpy-1.0.4.dist-info/licenses}/LICENSE
RENAMED
|
File without changes
|