noshot 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/__init__.py +1 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(A) Breadth First Search.ipynb +112 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(B) Depth First Search.ipynb +111 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(C) Uniform Cost Search.ipynb +134 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(D) Depth Limites Search.ipynb +115 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(E) Iterative Deepening DFS.ipynb +123 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/2_ANOVA.csv +769 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA (Repeated Measure).ipynb +126 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA.ipynb +134 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Sample 1 Way ANOVA Test.ipynb +119 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Two Way ANOVA.ipynb +138 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/reaction_time.csv +5 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sample_data.csv +16 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sleep_deprivation.csv +4 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/3_Linear.csv +4802 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression LAB.ipynb +113 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression New- sklearn.ipynb +118 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression.ipynb +148 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/house_rate.csv +22 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression New- sklearn.ipynb +128 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression.ipynb +145 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/default.csv +1001 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/hours_scores_records.csv +101 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(A) Astar.ipynb +256 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(B) IDAstar.ipynb +157 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(C) SMAstar.ipynb +178 -0
- noshot/data/AIDS CN NLP/AIDS/3. Genetic Algorithm/Genetic.ipynb +95 -0
- noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Simulated Annealing.ipynb +74 -0
- noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Sudoku Simulated Annealing.ipynb +103 -0
- noshot/data/AIDS CN NLP/AIDS/5. Alpha Beta Pruning/AlphaBetaPruning.ipynb +182 -0
- noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(A) CSP House Allocation.ipynb +120 -0
- noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(B) CSP Map Coloring.ipynb +125 -0
- noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/Random Sampling.ipynb +73 -0
- noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test Hash Function.ipynb +141 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test.ipynb +151 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/1_heart.csv +304 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/Independent T Test.ipynb +119 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/Paired T Test.ipynb +118 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test Hash Function.ipynb +142 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test.ipynb +158 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/iq_test.csv +0 -0
- noshot/data/AIDS CN NLP/AIDS/Others (AllinOne)/All In One.ipynb +4581 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/chat.java +81 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/LAN.tcl +65 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/analysis.awk +44 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/complexdcf.tcl +229 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/file_to_send.txt +2 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/filetransfer.java +119 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/rmi.java +56 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.awk +25 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.tcl +81 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.awk +27 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.tcl +153 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/analysis.awk +27 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/sack.tcl +86 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/vegas.tcl +86 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/analysis.awk +28 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/reno.tcl +78 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/tahoe.tcl +79 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/analysis.awk +27 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/flow.tcl +163 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/DV.tcl +111 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/LS.tcl +106 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/analysis.awk +36 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/analysis.awk +20 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/broadcast.tcl +76 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/multicast.tcl +103 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/DHCP.java +125 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/procedure.png +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/1-Prereqs.py +18 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/2-Chi2test.py +83 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/2-T-test.py +79 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/3-WSD-nb.py +53 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/4-Hindle-Rooth.py +53 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/5-HMM-Trellis.py +82 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/6-HMM-Viterbi.py +16 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/7-PCFG-parsetree.py +15 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Chi2test.ipynb +285 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Hindle-Rooth.ipynb +179 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 10 - Text generator using LSTM.ipynb +1461 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 11 NMT.ipynb +2307 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/PCFG.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Prereqs.ipynb +131 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/T test.ipynb +252 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/TFIDF BOW.ipynb +171 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Trellis.ipynb +244 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/WSD.ipynb +645 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Word2Vec.ipynb +93 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/tokenizer.ipynb +370 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/training_tokenizer.txt +6 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/exp0.ipynb +274 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/lab2.ipynb +905 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/test.txt +1 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/tokenizing.ipynb +272 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/collocation.ipynb +332 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/lab3.ipynb +549 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/nlp.txt +1 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/Lab4-NLP-Exp-2.ipynb +817 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/collocation.ipynb +332 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/NLP-Lab-5-Exp3.ipynb +231 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/word-sense-disambiguation.ipynb +507 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab06(additional-exercise)/lab6.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP Exp 4.ipynb +255 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP_Exp_5.ipynb +159 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab08(PCFG)/PCFG.ipynb +282 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/Lab 9 - MLP classifier.ipynb +670 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/MLP-alternative-code.ipynb +613 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/hindle-rooth-algorithm.ipynb +74 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab10(LSTM)/Lab_10_Text_generator_using_LSTM.ipynb +480 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Machine-translation.ipynb +445 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Viterbi-PCFG.ipynb +105 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/corpora_tools.py +87 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/data_utils.py +11 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/train_translator.py +83 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab12(Information-Extraction)/Information_Extraction.ipynb +201 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Backtrack-without-Verbitri.ipynb +185 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Backward-Procedure.ipynb +597 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Bag_of.ipynb +1422 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/CYK-algorithm.ipynb +1067 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Forward-Procedure.ipynb +477 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/LSTM.ipynb +1290 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 10 - Text generator using LSTM.ipynb +1461 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 11 NMT.ipynb +2307 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-4.ipynb +216 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-5.ipynb +216 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/abc.txt +6 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/ex-1-nltk.ipynb +711 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/ex-2-nlp.ipynb +267 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/exp8&9.ipynb +305 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/hind.ipynb +287 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/lab66.ipynb +752 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/leb_3.ipynb +612 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/naive_bayes_classifier.pkl +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_1.ipynb +3008 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_2.ipynb +3095 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlplab-9.ipynb +295 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nltk-ex-4.ipynb +506 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text1.txt +48 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text2.txt +8 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text3.txt +48 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/translation-rnn.ipynb +812 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/word2vector.ipynb +173 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Backward Procedure Algorithm.ipynb +179 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Chi Square Collocation.ipynb +208 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Collocation (T test).ipynb +188 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Experiment 1.ipynb +437 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Forward Procedure Algorithm.ipynb +132 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Hindle Rooth.ipynb +414 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/MachineTranslation.ipynb +368 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using MLPClassifier.ipynb +86 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using Tensorflow.ipynb +112 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/PCFG Inside Probability.ipynb +451 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Text Generation using LSTM.ipynb +297 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Viterbi.ipynb +310 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Word Sense Disambiguation.ipynb +335 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/10.Text Generation using LSTM.ipynb +316 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/11.Machine Translation.ipynb +868 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/2.T and Chi2 Test.ipynb +204 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/3.Word Sense Diambiguation.ipynb +234 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/4.Hinddle and Rooth.ipynb +128 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/5.Forward and Backward.ipynb +149 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/6.Viterbi.ipynb +111 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG Parse Tree.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG using cyk.ipynb +101 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/8.Bag of words and TF-IDF.ipynb +310 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/9.Word2Vector.ipynb +78 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/NLP ALL In One.ipynb +2619 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/sample1.txt +15 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/sample2.txt +4 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/word2vec_model.bin +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/1. Tokenize, Tagging, NER, Parse Tree.ipynb +312 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/2. T Test and Chi2 Test.ipynb +185 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/3. Naive Bayes WSD.ipynb +199 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/4. Hinddle and Rooth.ipynb +151 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/5 and 6 FWD, BWD, Viterbi.ipynb +164 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/7. PCFG using CYK.ipynb +383 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/8. BOW and TF-IDF.ipynb +252 -0
- noshot/data/AIDS CN NLP/Ubuntu CN Lab.iso +0 -0
- noshot/main.py +47 -0
- noshot-0.1.0.dist-info/LICENSE.txt +21 -0
- noshot-0.1.0.dist-info/METADATA +65 -0
- noshot-0.1.0.dist-info/RECORD +210 -0
- noshot-0.1.0.dist-info/WHEEL +5 -0
- noshot-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,267 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": 1,
|
6
|
+
"id": "4f37e273-c9bd-4447-b164-ae3d72d60c3b",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [
|
9
|
+
{
|
10
|
+
"name": "stdout",
|
11
|
+
"output_type": "stream",
|
12
|
+
"text": [
|
13
|
+
"| Rank | Bigram | Frequency | Mean Probability (μ-value) |\n",
|
14
|
+
"|--------+-----------------------------+-------------+------------------------------|\n",
|
15
|
+
"| 1 | ('data', 'science') | 15 | 0.020243 |\n",
|
16
|
+
"| 2 | ('data', 'processing') | 7 | 0.009447 |\n",
|
17
|
+
"| 3 | ('predictive', 'analytics') | 5 | 0.006748 |\n",
|
18
|
+
"| 4 | ('data', 'visualization') | 5 | 0.006748 |\n",
|
19
|
+
"| 5 | ('ai', 'data') | 4 | 0.005398 |\n",
|
20
|
+
"| 6 | ('ai', 'algorithms') | 4 | 0.005398 |\n",
|
21
|
+
"| 7 | ('data', 'cleaning') | 4 | 0.005398 |\n",
|
22
|
+
"| 8 | ('cleaning', 'preparation') | 4 | 0.005398 |\n",
|
23
|
+
"| 9 | ('natural', 'language') | 4 | 0.005398 |\n",
|
24
|
+
"| 10 | ('language', 'processing') | 4 | 0.005398 |\n"
|
25
|
+
]
|
26
|
+
}
|
27
|
+
],
|
28
|
+
"source": [
|
29
|
+
"import nltk\n",
|
30
|
+
"from nltk.tokenize import word_tokenize\n",
|
31
|
+
"from nltk.corpus import stopwords\n",
|
32
|
+
"from nltk.probability import FreqDist\n",
|
33
|
+
"from tabulate import tabulate\n",
|
34
|
+
"\n",
|
35
|
+
"def preprocess_text(text: str) -> list[str]:\n",
|
36
|
+
" \"\"\"Preprocesses the text by tokenizing, removing punctuation and stopwords.\"\"\"\n",
|
37
|
+
" words = word_tokenize(text)\n",
|
38
|
+
" stop_words = set(stopwords.words('english'))\n",
|
39
|
+
" return [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words]\n",
|
40
|
+
"\n",
|
41
|
+
"def calculate_mean_probability(bigram_freq: FreqDist, total_bigrams: int) -> dict:\n",
|
42
|
+
" \"\"\"Calculates the mean probability (μ-value) of each bigram.\"\"\"\n",
|
43
|
+
" mean_probabilities = {bigram: freq / total_bigrams for bigram, freq in bigram_freq.items()}\n",
|
44
|
+
" return mean_probabilities\n",
|
45
|
+
"\n",
|
46
|
+
"def main():\n",
|
47
|
+
" # Download required NLTK data\n",
|
48
|
+
" # nltk.download('punkt')\n",
|
49
|
+
" # nltk.download('stopwords')\n",
|
50
|
+
"\n",
|
51
|
+
" # Load text\n",
|
52
|
+
" with open('text3.txt', 'r') as file:\n",
|
53
|
+
" text = file.read()\n",
|
54
|
+
"\n",
|
55
|
+
" # Preprocess text\n",
|
56
|
+
" words = preprocess_text(text)\n",
|
57
|
+
"\n",
|
58
|
+
" # Calculate word frequency distribution\n",
|
59
|
+
" fdist = FreqDist(words)\n",
|
60
|
+
"\n",
|
61
|
+
" # Calculate bigrams and their frequencies\n",
|
62
|
+
" bigrams = list(nltk.bigrams(words))\n",
|
63
|
+
" bigram_freq = FreqDist(bigrams)\n",
|
64
|
+
"\n",
|
65
|
+
" # Calculate mean probability (μ-value) for each bigram\n",
|
66
|
+
" mean_probabilities = calculate_mean_probability(bigram_freq, len(bigrams))\n",
|
67
|
+
"\n",
|
68
|
+
" # Sort collocations by mean probability\n",
|
69
|
+
" collocations = sorted(mean_probabilities.items(), key=lambda x: x[1], reverse=True)\n",
|
70
|
+
"\n",
|
71
|
+
" '''# Calculate mean of bigram frequencies\n",
|
72
|
+
" total_bigram_freq = sum(bigram_freq.values())\n",
|
73
|
+
" mean_bigram_freq = total_bigram_freq / len(bigram_freq)\n",
|
74
|
+
"\n",
|
75
|
+
" # Print mean bigram frequency\n",
|
76
|
+
" print(f\"Mean Bigram Frequency: {mean_bigram_freq:.2f}\\n\")'''\n",
|
77
|
+
"\n",
|
78
|
+
" # Print top N collocations with their frequencies and mean probabilities\n",
|
79
|
+
" N = 10\n",
|
80
|
+
" headers = [\"Rank\", \"Bigram\", \"Frequency\", \"Mean Probability (μ-value)\"]\n",
|
81
|
+
" table = []\n",
|
82
|
+
" for i, (bigram, mean_prob) in enumerate(collocations[:N]):\n",
|
83
|
+
" table.append([i+1, bigram, bigram_freq[bigram], f\"{mean_prob:.6f}\"])\n",
|
84
|
+
" print(tabulate(table, headers, tablefmt=\"orgtbl\"))\n",
|
85
|
+
"\n",
|
86
|
+
"if __name__ == \"__main__\":\n",
|
87
|
+
" main()"
|
88
|
+
]
|
89
|
+
},
|
90
|
+
{
|
91
|
+
"cell_type": "code",
|
92
|
+
"execution_count": 2,
|
93
|
+
"id": "1ba8ff30-565b-471a-a161-e47e0eb72cbc",
|
94
|
+
"metadata": {},
|
95
|
+
"outputs": [
|
96
|
+
{
|
97
|
+
"name": "stdout",
|
98
|
+
"output_type": "stream",
|
99
|
+
"text": [
|
100
|
+
"Mean Bigram Frequency: 1.17\n",
|
101
|
+
"\n",
|
102
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
103
|
+
"| Rank | Bigram | Frequency | Mean Prob(μ) | t-Statistic | p-Value(t-Test) | Chi Square | p-Value(Chi-Square) |\n",
|
104
|
+
"+========+================================+=============+================+===============+===================+==============+=======================+\n",
|
105
|
+
"| 1 | ('impact', 'artificial') | 2 | 0.002699 | 1.4152 | 0.1574 | 183.747 | 0 |\n",
|
106
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
107
|
+
"| 2 | ('artificial', 'intelligence') | 3 | 0.004049 | 1.7344 | 0.0833 | 307.077 | 0 |\n",
|
108
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
109
|
+
"| 3 | ('intelligence', 'data') | 3 | 0.004049 | 1.7344 | 0.0833 | 12.4097 | 0.0004 |\n",
|
110
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
111
|
+
"| 4 | ('data', 'science') | 15 | 0.020243 | 3.9101 | 0.0001 | 167.483 | 0 |\n",
|
112
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
113
|
+
"| 5 | ('science', 'introduction') | 1 | 0.00135 | 1 | 0.3176 | 11.6208 | 0.0007 |\n",
|
114
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
115
|
+
"| 6 | ('introduction', 'recent') | 1 | 0.00135 | 1 | 0.3176 | 184.75 | 0 |\n",
|
116
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
117
|
+
"| 7 | ('recent', 'years') | 1 | 0.00135 | 1 | 0.3176 | 184.75 | 0 |\n",
|
118
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
119
|
+
"| 8 | ('years', 'convergence') | 1 | 0.00135 | 1 | 0.3176 | 184.75 | 0 |\n",
|
120
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
121
|
+
"| 9 | ('convergence', 'artificial') | 1 | 0.00135 | 1 | 0.3176 | 61.0835 | 0 |\n",
|
122
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n",
|
123
|
+
"| 10 | ('intelligence', 'ai') | 1 | 0.00135 | 1 | 0.3176 | 0.4959 | 0.4813 |\n",
|
124
|
+
"+--------+--------------------------------+-------------+----------------+---------------+-------------------+--------------+-----------------------+\n"
|
125
|
+
]
|
126
|
+
}
|
127
|
+
],
|
128
|
+
"source": [
|
129
|
+
"import nltk\n",
|
130
|
+
"from nltk.tokenize import word_tokenize\n",
|
131
|
+
"from nltk.corpus import stopwords\n",
|
132
|
+
"from nltk.probability import FreqDist\n",
|
133
|
+
"from tabulate import tabulate\n",
|
134
|
+
"from scipy.stats import chi2_contingency, ttest_1samp\n",
|
135
|
+
"import numpy as np\n",
|
136
|
+
"\n",
|
137
|
+
"def preprocess_text(text: str) -> list[str]:\n",
|
138
|
+
" \"\"\"Preprocesses the text by tokenizing, removing punctuation and stopwords.\"\"\"\n",
|
139
|
+
" words = word_tokenize(text)\n",
|
140
|
+
" stop_words = set(stopwords.words('english'))\n",
|
141
|
+
" return [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words]\n",
|
142
|
+
"\n",
|
143
|
+
"def calculate_mean_probability(bigram_freq: FreqDist, total_bigrams: int) -> dict:\n",
|
144
|
+
" \"\"\"Calculates the mean probability (μ-value) of each bigram.\"\"\"\n",
|
145
|
+
" mean_probabilities = {bigram: freq / total_bigrams for bigram, freq in bigram_freq.items()}\n",
|
146
|
+
" return mean_probabilities\n",
|
147
|
+
"\n",
|
148
|
+
"def perform_statistical_tests(bigram_freq: FreqDist, word_freq: FreqDist, total_bigrams: int):\n",
|
149
|
+
" \"\"\"Perform t-test and chi-square test for each bigram.\"\"\"\n",
|
150
|
+
" results = []\n",
|
151
|
+
"\n",
|
152
|
+
" for bigram, observed_freq in bigram_freq.items():\n",
|
153
|
+
" word1, word2 = bigram\n",
|
154
|
+
" freq_w1 = word_freq.get(word1, 0)\n",
|
155
|
+
" freq_w2 = word_freq.get(word2, 0)\n",
|
156
|
+
" \n",
|
157
|
+
" # Expected frequency for the bigram assuming independence\n",
|
158
|
+
" expected_freq = (freq_w1 * freq_w2) / total_bigrams\n",
|
159
|
+
" \n",
|
160
|
+
" # Chi-square test\n",
|
161
|
+
" observed = np.array([\n",
|
162
|
+
" [observed_freq, freq_w1 - observed_freq],\n",
|
163
|
+
" [freq_w2 - observed_freq, total_bigrams - (freq_w1 + freq_w2 - observed_freq)]\n",
|
164
|
+
" ])\n",
|
165
|
+
" \n",
|
166
|
+
" try:\n",
|
167
|
+
" chi2_stat, p_value_chi2, dof, ex = chi2_contingency(observed)\n",
|
168
|
+
" except ValueError:\n",
|
169
|
+
" chi2_stat, p_value_chi2 = np.nan, np.nan\n",
|
170
|
+
" \n",
|
171
|
+
" # Generate sample data to perform t-test\n",
|
172
|
+
" sample_data = [observed_freq] * observed_freq + [expected_freq] * (total_bigrams - observed_freq)\n",
|
173
|
+
" \n",
|
174
|
+
" # Perform one-sample t-test\n",
|
175
|
+
" t_stat, p_value_t = ttest_1samp(sample_data, expected_freq)\n",
|
176
|
+
" \n",
|
177
|
+
" results.append((bigram, observed_freq, t_stat, p_value_t, chi2_stat, p_value_chi2))\n",
|
178
|
+
" \n",
|
179
|
+
" return results\n",
|
180
|
+
"\n",
|
181
|
+
"def main():\n",
|
182
|
+
" # Download required NLTK data\n",
|
183
|
+
" # nltk.download('punkt')\n",
|
184
|
+
" # nltk.download('stopwords')\n",
|
185
|
+
"\n",
|
186
|
+
" # Load text\n",
|
187
|
+
" with open(\"text3.txt\", 'r') as file:\n",
|
188
|
+
" text = file.read()\n",
|
189
|
+
"\n",
|
190
|
+
" # Preprocess text\n",
|
191
|
+
" words = preprocess_text(text)\n",
|
192
|
+
"\n",
|
193
|
+
" # Calculate word frequency distribution\n",
|
194
|
+
" word_freq = FreqDist(words)\n",
|
195
|
+
"\n",
|
196
|
+
" # Calculate bigrams and their frequencies\n",
|
197
|
+
" bigrams = list(nltk.bigrams(words))\n",
|
198
|
+
" bigram_freq = FreqDist(bigrams)\n",
|
199
|
+
"\n",
|
200
|
+
" # Calculate mean probability (μ-value) for each bigram\n",
|
201
|
+
" mean_probabilities = calculate_mean_probability(bigram_freq, len(bigrams))\n",
|
202
|
+
"\n",
|
203
|
+
" # Sort collocations by mean probability\n",
|
204
|
+
" collocations = sorted(mean_probabilities.items(), key=lambda x: x[1], reverse=True)\n",
|
205
|
+
"\n",
|
206
|
+
" # Calculate mean of bigram frequencies\n",
|
207
|
+
" total_bigram_freq = sum(bigram_freq.values())\n",
|
208
|
+
" mean_bigram_freq = total_bigram_freq / len(bigram_freq)\n",
|
209
|
+
"\n",
|
210
|
+
" # Print mean bigram frequency\n",
|
211
|
+
" print(f\"Mean Bigram Frequency: {mean_bigram_freq:.2f}\\n\")\n",
|
212
|
+
"\n",
|
213
|
+
" # Perform statistical tests for each bigram\n",
|
214
|
+
" results = perform_statistical_tests(bigram_freq, word_freq, len(bigrams))\n",
|
215
|
+
"\n",
|
216
|
+
" # Print top N collocations with their frequencies and mean probabilities\n",
|
217
|
+
" N = 10\n",
|
218
|
+
" headers = [\"Rank\", \"Bigram\", \"Frequency\", \"Mean Prob(μ)\", \"t-Statistic\", \"p-Value(t-Test)\", \"Chi Square\", \"p-Value(Chi-Square)\"]\n",
|
219
|
+
" table = []\n",
|
220
|
+
" for i, (bigram, observed_freq, t_stat, p_value_t, chi2_stat, p_value_chi2) in enumerate(results[:N]):\n",
|
221
|
+
" table.append([\n",
|
222
|
+
" i + 1,\n",
|
223
|
+
" bigram,\n",
|
224
|
+
" observed_freq,\n",
|
225
|
+
" f\"{mean_probabilities.get(bigram, 0):.6f}\",\n",
|
226
|
+
" f\"{t_stat:.4f}\" if not np.isnan(t_stat) else \"NaN\",\n",
|
227
|
+
" f\"{p_value_t:.4f}\" if not np.isnan(p_value_t) else \"NaN\",\n",
|
228
|
+
" f\"{chi2_stat:.4f}\" if not np.isnan(chi2_stat) else \"NaN\",\n",
|
229
|
+
" f\"{p_value_chi2:.4f}\" if not np.isnan(p_value_chi2) else \"NaN\"\n",
|
230
|
+
" ])\n",
|
231
|
+
" print(tabulate(table, headers, tablefmt=\"grid\"))\n",
|
232
|
+
"\n",
|
233
|
+
"if __name__ == \"__main__\":\n",
|
234
|
+
" main()\n"
|
235
|
+
]
|
236
|
+
},
|
237
|
+
{
|
238
|
+
"cell_type": "code",
|
239
|
+
"execution_count": null,
|
240
|
+
"id": "35ee8280-0449-4fb4-846e-3128f29a09cb",
|
241
|
+
"metadata": {},
|
242
|
+
"outputs": [],
|
243
|
+
"source": []
|
244
|
+
}
|
245
|
+
],
|
246
|
+
"metadata": {
|
247
|
+
"kernelspec": {
|
248
|
+
"display_name": "Python 3 (ipykernel)",
|
249
|
+
"language": "python",
|
250
|
+
"name": "python3"
|
251
|
+
},
|
252
|
+
"language_info": {
|
253
|
+
"codemirror_mode": {
|
254
|
+
"name": "ipython",
|
255
|
+
"version": 3
|
256
|
+
},
|
257
|
+
"file_extension": ".py",
|
258
|
+
"mimetype": "text/x-python",
|
259
|
+
"name": "python",
|
260
|
+
"nbconvert_exporter": "python",
|
261
|
+
"pygments_lexer": "ipython3",
|
262
|
+
"version": "3.11.7"
|
263
|
+
}
|
264
|
+
},
|
265
|
+
"nbformat": 4,
|
266
|
+
"nbformat_minor": 5
|
267
|
+
}
|
@@ -0,0 +1,305 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": 4,
|
6
|
+
"id": "b644056b-f928-4b45-9229-7e487a552005",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [
|
9
|
+
{
|
10
|
+
"name": "stdout",
|
11
|
+
"output_type": "stream",
|
12
|
+
"text": [
|
13
|
+
" one child '' time like without littl \\\n",
|
14
|
+
"one 1.000000 -0.005589 -0.036006 -0.100524 -0.021266 -0.039723 0.029068 \n",
|
15
|
+
"child -0.005589 1.000000 -0.019693 0.067477 0.002063 0.015178 -0.108766 \n",
|
16
|
+
"'' -0.036006 -0.019693 1.000000 -0.003567 0.172956 0.083302 0.159254 \n",
|
17
|
+
"time -0.100524 0.067477 -0.003567 1.000000 -0.048268 0.142675 0.052523 \n",
|
18
|
+
"like -0.021266 0.002063 0.172956 -0.048268 1.000000 0.145309 0.040206 \n",
|
19
|
+
"\n",
|
20
|
+
" life word could ... vitiat brain real \\\n",
|
21
|
+
"one 0.107384 0.035972 0.222857 ... -0.022165 0.125102 0.026518 \n",
|
22
|
+
"child -0.108151 0.034731 -0.094406 ... -0.046126 -0.048296 -0.017403 \n",
|
23
|
+
"'' 0.014497 0.207438 -0.025448 ... -0.067862 -0.069103 -0.085746 \n",
|
24
|
+
"time -0.003863 0.078271 -0.163900 ... -0.103645 0.090741 -0.041755 \n",
|
25
|
+
"like -0.023750 -0.067008 -0.174552 ... -0.180237 -0.130274 -0.028213 \n",
|
26
|
+
"\n",
|
27
|
+
" physic moral situat draw back insurmount \\\n",
|
28
|
+
"one 0.153639 -0.013773 -0.083296 -0.011102 -0.261834 -0.143206 \n",
|
29
|
+
"child 0.021467 -0.087260 0.167192 -0.035414 0.061198 -0.088217 \n",
|
30
|
+
"'' 0.144862 -0.039488 -0.066403 0.200111 -0.104986 -0.135296 \n",
|
31
|
+
"time 0.122950 -0.169851 0.139709 -0.024709 0.100319 0.028166 \n",
|
32
|
+
"like 0.085665 -0.126780 0.174523 0.037570 -0.040087 0.108647 \n",
|
33
|
+
"\n",
|
34
|
+
" veziers-le-rethel \n",
|
35
|
+
"one 0.031907 \n",
|
36
|
+
"child -0.138646 \n",
|
37
|
+
"'' -0.073759 \n",
|
38
|
+
"time 0.016197 \n",
|
39
|
+
"like -0.043849 \n",
|
40
|
+
"\n",
|
41
|
+
"[5 rows x 651 columns]\n"
|
42
|
+
]
|
43
|
+
},
|
44
|
+
{
|
45
|
+
"name": "stderr",
|
46
|
+
"output_type": "stream",
|
47
|
+
"text": [
|
48
|
+
"C:\\Users\\admin\\anaconda3\\Lib\\site-packages\\keras\\src\\layers\\core\\input_layer.py:25: UserWarning: Argument `input_shape` is deprecated. Use `shape` instead.\n",
|
49
|
+
" warnings.warn(\n"
|
50
|
+
]
|
51
|
+
},
|
52
|
+
{
|
53
|
+
"data": {
|
54
|
+
"text/html": [
|
55
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_2\"</span>\n",
|
56
|
+
"</pre>\n"
|
57
|
+
],
|
58
|
+
"text/plain": [
|
59
|
+
"\u001b[1mModel: \"sequential_2\"\u001b[0m\n"
|
60
|
+
]
|
61
|
+
},
|
62
|
+
"metadata": {},
|
63
|
+
"output_type": "display_data"
|
64
|
+
},
|
65
|
+
{
|
66
|
+
"data": {
|
67
|
+
"text/html": [
|
68
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
|
69
|
+
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃\n",
|
70
|
+
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
|
71
|
+
"│ dense_6 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">6,464</span> │\n",
|
72
|
+
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
|
73
|
+
"│ dense_7 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">2,080</span> │\n",
|
74
|
+
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
|
75
|
+
"│ dense_8 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">33</span> │\n",
|
76
|
+
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
|
77
|
+
"</pre>\n"
|
78
|
+
],
|
79
|
+
"text/plain": [
|
80
|
+
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
|
81
|
+
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
|
82
|
+
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
|
83
|
+
"│ dense_6 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m6,464\u001b[0m │\n",
|
84
|
+
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
|
85
|
+
"│ dense_7 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m2,080\u001b[0m │\n",
|
86
|
+
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
|
87
|
+
"│ dense_8 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m33\u001b[0m │\n",
|
88
|
+
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
|
89
|
+
]
|
90
|
+
},
|
91
|
+
"metadata": {},
|
92
|
+
"output_type": "display_data"
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"data": {
|
96
|
+
"text/html": [
|
97
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">8,577</span> (33.50 KB)\n",
|
98
|
+
"</pre>\n"
|
99
|
+
],
|
100
|
+
"text/plain": [
|
101
|
+
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m8,577\u001b[0m (33.50 KB)\n"
|
102
|
+
]
|
103
|
+
},
|
104
|
+
"metadata": {},
|
105
|
+
"output_type": "display_data"
|
106
|
+
},
|
107
|
+
{
|
108
|
+
"data": {
|
109
|
+
"text/html": [
|
110
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">8,577</span> (33.50 KB)\n",
|
111
|
+
"</pre>\n"
|
112
|
+
],
|
113
|
+
"text/plain": [
|
114
|
+
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m8,577\u001b[0m (33.50 KB)\n"
|
115
|
+
]
|
116
|
+
},
|
117
|
+
"metadata": {},
|
118
|
+
"output_type": "display_data"
|
119
|
+
},
|
120
|
+
{
|
121
|
+
"data": {
|
122
|
+
"text/html": [
|
123
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
|
124
|
+
"</pre>\n"
|
125
|
+
],
|
126
|
+
"text/plain": [
|
127
|
+
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
|
128
|
+
]
|
129
|
+
},
|
130
|
+
"metadata": {},
|
131
|
+
"output_type": "display_data"
|
132
|
+
},
|
133
|
+
{
|
134
|
+
"name": "stdout",
|
135
|
+
"output_type": "stream",
|
136
|
+
"text": [
|
137
|
+
"Epoch 1/10\n",
|
138
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 2s/step - accuracy: 1.0000 - loss: 0.6929\n",
|
139
|
+
"Epoch 2/10\n",
|
140
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 26ms/step - accuracy: 1.0000 - loss: 0.6909\n",
|
141
|
+
"Epoch 3/10\n",
|
142
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 1.0000 - loss: 0.6883\n",
|
143
|
+
"Epoch 4/10\n",
|
144
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 1.0000 - loss: 0.6856\n",
|
145
|
+
"Epoch 5/10\n",
|
146
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 1.0000 - loss: 0.6830\n",
|
147
|
+
"Epoch 6/10\n",
|
148
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 1.0000 - loss: 0.6804\n",
|
149
|
+
"Epoch 7/10\n",
|
150
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 25ms/step - accuracy: 1.0000 - loss: 0.6776\n",
|
151
|
+
"Epoch 8/10\n",
|
152
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 1.0000 - loss: 0.6748\n",
|
153
|
+
"Epoch 9/10\n",
|
154
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 1.0000 - loss: 0.6719\n",
|
155
|
+
"Epoch 10/10\n",
|
156
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 1.0000 - loss: 0.6690\n",
|
157
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 146ms/step - accuracy: 1.0000 - loss: 0.6661\n",
|
158
|
+
"\n",
|
159
|
+
"Final loss: 0.6660556197166443\n",
|
160
|
+
"Final accuracy: 1.0\n"
|
161
|
+
]
|
162
|
+
}
|
163
|
+
],
|
164
|
+
"source": [
|
165
|
+
"# Import required libraries\n",
|
166
|
+
"import nltk\n",
|
167
|
+
"import numpy as np\n",
|
168
|
+
"import pandas as pd\n",
|
169
|
+
"import re\n",
|
170
|
+
"import seaborn as sns\n",
|
171
|
+
"import matplotlib.pyplot as plt\n",
|
172
|
+
"from gensim.models import Word2Vec\n",
|
173
|
+
"from nltk.corpus import stopwords\n",
|
174
|
+
"from nltk.tokenize import word_tokenize\n",
|
175
|
+
"from nltk.stem import WordNetLemmatizer, PorterStemmer\n",
|
176
|
+
"import tensorflow as tf\n",
|
177
|
+
"import string\n",
|
178
|
+
"\n",
|
179
|
+
"# Download required NLTK resources (uncomment if not already downloaded)\n",
|
180
|
+
"# nltk.download('punkt')\n",
|
181
|
+
"# nltk.download('stopwords')\n",
|
182
|
+
"# nltk.download('wordnet')\n",
|
183
|
+
"\n",
|
184
|
+
"# Initialize the stopwords, lemmatizer, and stemmer\n",
|
185
|
+
"stop_words = set(stopwords.words('english'))\n",
|
186
|
+
"lemmatizer = WordNetLemmatizer()\n",
|
187
|
+
"stemmer = PorterStemmer()\n",
|
188
|
+
"\n",
|
189
|
+
"# Function to preprocess text data (Tokenization, Lemmatization, Stemming, Stopwords Removal)\n",
|
190
|
+
"def preprocess_text(text):\n",
|
191
|
+
" # Tokenize the text\n",
|
192
|
+
" tokens = word_tokenize(text.lower())\n",
|
193
|
+
" \n",
|
194
|
+
" # Remove punctuation and stopwords, and perform lemmatization and stemming\n",
|
195
|
+
" processed_tokens = []\n",
|
196
|
+
" for word in tokens:\n",
|
197
|
+
" if word not in stop_words and word not in string.punctuation:\n",
|
198
|
+
" lemmatized_word = lemmatizer.lemmatize(word) # Lemmatization\n",
|
199
|
+
" stemmed_word = stemmer.stem(lemmatized_word) # Stemming\n",
|
200
|
+
" processed_tokens.append(stemmed_word)\n",
|
201
|
+
" \n",
|
202
|
+
" return processed_tokens\n",
|
203
|
+
"\n",
|
204
|
+
"# Read the text data file\n",
|
205
|
+
"file_path = 'text2.txt' # You can change this to your actual file path\n",
|
206
|
+
"with open(file_path, 'r', encoding='utf-8') as file:\n",
|
207
|
+
" text_data = file.read()\n",
|
208
|
+
"\n",
|
209
|
+
"# Preprocess the text data\n",
|
210
|
+
"tokens = preprocess_text(text_data)\n",
|
211
|
+
"\n",
|
212
|
+
"# ============================\n",
|
213
|
+
"# WORD2VEC PART\n",
|
214
|
+
"# ============================\n",
|
215
|
+
"\n",
|
216
|
+
"# Train Word2Vec model\n",
|
217
|
+
"word2vec_model = Word2Vec(sentences=[tokens], vector_size=100, window=5, min_count=1, sg=0) # sg=0 for CBOW\n",
|
218
|
+
"\n",
|
219
|
+
"# Get the list of all unique words in the vocabulary\n",
|
220
|
+
"vocab = list(word2vec_model.wv.index_to_key)\n",
|
221
|
+
"\n",
|
222
|
+
"# Initialize a matrix to store similarity scores\n",
|
223
|
+
"similarity_matrix = np.zeros((len(vocab), len(vocab)))\n",
|
224
|
+
"\n",
|
225
|
+
"# Compute pairwise similarity for all words in the vocabulary\n",
|
226
|
+
"for i, word1 in enumerate(vocab):\n",
|
227
|
+
" for j, word2 in enumerate(vocab):\n",
|
228
|
+
" similarity_matrix[i, j] = word2vec_model.wv.similarity(word1, word2)\n",
|
229
|
+
"\n",
|
230
|
+
"# Convert similarity matrix into a pandas DataFrame for easy visualization\n",
|
231
|
+
"similarity_df = pd.DataFrame(similarity_matrix, index=vocab, columns=vocab)\n",
|
232
|
+
"\n",
|
233
|
+
"# Display the first few rows of the similarity matrix\n",
|
234
|
+
"print(similarity_df.head())\n",
|
235
|
+
"\n",
|
236
|
+
"# ============================\n",
|
237
|
+
"# NEURAL NETWORK PART\n",
|
238
|
+
"# ============================\n",
|
239
|
+
"\n",
|
240
|
+
"# Convert tokens to Word2Vec embeddings\n",
|
241
|
+
"word_embeddings = np.array([word2vec_model.wv[word] for word in tokens if word in word2vec_model.wv])\n",
|
242
|
+
"\n",
|
243
|
+
"# Aggregate Word2Vec embeddings by averaging\n",
|
244
|
+
"average_embedding = np.mean(word_embeddings, axis=0)\n",
|
245
|
+
"\n",
|
246
|
+
"# Reshape the averaged embedding to match the input expected by the model\n",
|
247
|
+
"average_embedding = average_embedding.reshape(1, -1) # Reshape to (1, 100)\n",
|
248
|
+
"\n",
|
249
|
+
"# For demo purposes, we create a mock label (you can replace it with your real labels)\n",
|
250
|
+
"labels = np.array([1]) # Assuming binary classification (0 or 1), change based on your data\n",
|
251
|
+
"\n",
|
252
|
+
"# Define the ANN model\n",
|
253
|
+
"model = tf.keras.Sequential([\n",
|
254
|
+
" tf.keras.layers.InputLayer(input_shape=(average_embedding.shape[1],)), # Input layer based on averaged Word2Vec vector size\n",
|
255
|
+
" tf.keras.layers.Dense(64, activation='relu'), # First hidden layer with 64 neurons\n",
|
256
|
+
" tf.keras.layers.Dense(32, activation='relu'), # Second hidden layer with 32 neurons\n",
|
257
|
+
" tf.keras.layers.Dense(1, activation='sigmoid') # Output layer (binary classification)\n",
|
258
|
+
"])\n",
|
259
|
+
"\n",
|
260
|
+
"# Compile the model\n",
|
261
|
+
"model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
|
262
|
+
"\n",
|
263
|
+
"# Print model summary\n",
|
264
|
+
"model.summary()\n",
|
265
|
+
"\n",
|
266
|
+
"# Train the model on the averaged Word2Vec embedding\n",
|
267
|
+
"history = model.fit(average_embedding, labels, epochs=10, verbose=1)\n",
|
268
|
+
"\n",
|
269
|
+
"# Evaluate model performance\n",
|
270
|
+
"loss, accuracy = model.evaluate(average_embedding, labels, verbose=1)\n",
|
271
|
+
"print(f\"\\nFinal loss: {loss}\")\n",
|
272
|
+
"print(f\"Final accuracy: {accuracy}\")\n"
|
273
|
+
]
|
274
|
+
},
|
275
|
+
{
|
276
|
+
"cell_type": "code",
|
277
|
+
"execution_count": null,
|
278
|
+
"id": "ec1ca09b-13eb-40a4-af30-3366f604ea29",
|
279
|
+
"metadata": {},
|
280
|
+
"outputs": [],
|
281
|
+
"source": []
|
282
|
+
}
|
283
|
+
],
|
284
|
+
"metadata": {
|
285
|
+
"kernelspec": {
|
286
|
+
"display_name": "Python 3 (ipykernel)",
|
287
|
+
"language": "python",
|
288
|
+
"name": "python3"
|
289
|
+
},
|
290
|
+
"language_info": {
|
291
|
+
"codemirror_mode": {
|
292
|
+
"name": "ipython",
|
293
|
+
"version": 3
|
294
|
+
},
|
295
|
+
"file_extension": ".py",
|
296
|
+
"mimetype": "text/x-python",
|
297
|
+
"name": "python",
|
298
|
+
"nbconvert_exporter": "python",
|
299
|
+
"pygments_lexer": "ipython3",
|
300
|
+
"version": "3.11.7"
|
301
|
+
}
|
302
|
+
},
|
303
|
+
"nbformat": 4,
|
304
|
+
"nbformat_minor": 5
|
305
|
+
}
|