noshot 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/ML/1. PCA - EDA/PCA-EDA.ipynb +207 -0
- noshot/data/ML TS XAI/ML/1. PCA - EDA/balance-scale.csv +626 -0
- noshot/data/ML TS XAI/ML/1. PCA - EDA/input.txt +625 -0
- noshot/data/ML TS XAI/ML/2. KNN Classifier/KNN.ipynb +287 -0
- noshot/data/ML TS XAI/ML/2. KNN Classifier/balance-scale.csv +626 -0
- noshot/data/ML TS XAI/ML/2. KNN Classifier/input.txt +625 -0
- noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/LDA.ipynb +83 -0
- noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/balance-scale.csv +626 -0
- noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/input.txt +625 -0
- noshot/data/ML TS XAI/ML/4. Linear Regression/Linear-Regression.ipynb +117 -0
- noshot/data/ML TS XAI/ML/4. Linear Regression/machine-data.csv +210 -0
- noshot/data/ML TS XAI/ML/5. Logistic Regression/Logistic-Regression.ipynb +137 -0
- noshot/data/ML TS XAI/ML/5. Logistic Regression/wine-dataset.csv +179 -0
- noshot/data/ML TS XAI/ML/6. Bayesian Classifier/Bayesian.ipynb +129 -0
- noshot/data/ML TS XAI/ML/6. Bayesian Classifier/wine-dataset.csv +179 -0
- {noshot-0.1.7.dist-info → noshot-0.1.8.dist-info}/METADATA +2 -2
- noshot-0.1.8.dist-info/RECORD +24 -0
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(A) Breadth First Search.ipynb +0 -112
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(B) Depth First Search.ipynb +0 -111
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(C) Uniform Cost Search.ipynb +0 -134
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(D) Depth Limites Search.ipynb +0 -115
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(E) Iterative Deepening DFS.ipynb +0 -123
- noshot/data/ML TS XAI/AIDS/10. ANOVA/2_ANOVA.csv +0 -769
- noshot/data/ML TS XAI/AIDS/10. ANOVA/One Way ANOVA (Repeated Measure).ipynb +0 -126
- noshot/data/ML TS XAI/AIDS/10. ANOVA/One Way ANOVA.ipynb +0 -134
- noshot/data/ML TS XAI/AIDS/10. ANOVA/Sample 1 Way ANOVA Test.ipynb +0 -119
- noshot/data/ML TS XAI/AIDS/10. ANOVA/Two Way ANOVA.ipynb +0 -138
- noshot/data/ML TS XAI/AIDS/10. ANOVA/reaction_time.csv +0 -5
- noshot/data/ML TS XAI/AIDS/10. ANOVA/sample_data.csv +0 -16
- noshot/data/ML TS XAI/AIDS/10. ANOVA/sleep_deprivation.csv +0 -4
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/3_Linear.csv +0 -4802
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression LAB.ipynb +0 -113
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression New- sklearn.ipynb +0 -118
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression.ipynb +0 -148
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/house_rate.csv +0 -22
- noshot/data/ML TS XAI/AIDS/12. Logistic Regression/Logistic Regression New- sklearn.ipynb +0 -128
- noshot/data/ML TS XAI/AIDS/12. Logistic Regression/Logistic Regression.ipynb +0 -145
- noshot/data/ML TS XAI/AIDS/12. Logistic Regression/default.csv +0 -1001
- noshot/data/ML TS XAI/AIDS/12. Logistic Regression/hours_scores_records.csv +0 -101
- noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(A) Astar.ipynb +0 -256
- noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(B) IDAstar.ipynb +0 -157
- noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(C) SMAstar.ipynb +0 -178
- noshot/data/ML TS XAI/AIDS/3. Genetic Algorithm/Genetic.ipynb +0 -95
- noshot/data/ML TS XAI/AIDS/4. Simulated Annealing/Simulated Annealing.ipynb +0 -74
- noshot/data/ML TS XAI/AIDS/4. Simulated Annealing/Sudoku Simulated Annealing.ipynb +0 -103
- noshot/data/ML TS XAI/AIDS/5. Alpha Beta Pruning/AlphaBetaPruning.ipynb +0 -182
- noshot/data/ML TS XAI/AIDS/6. Consraint Satisfaction Problems (CSP)/(A) CSP House Allocation.ipynb +0 -120
- noshot/data/ML TS XAI/AIDS/6. Consraint Satisfaction Problems (CSP)/(B) CSP Map Coloring.ipynb +0 -125
- noshot/data/ML TS XAI/AIDS/7. Random Sampling/Random Sampling.ipynb +0 -73
- noshot/data/ML TS XAI/AIDS/7. Random Sampling/height_weight_bmi.csv +0 -8389
- noshot/data/ML TS XAI/AIDS/8. Z Test/Z Test Hash Function.ipynb +0 -141
- noshot/data/ML TS XAI/AIDS/8. Z Test/Z Test.ipynb +0 -151
- noshot/data/ML TS XAI/AIDS/8. Z Test/height_weight_bmi.csv +0 -8389
- noshot/data/ML TS XAI/AIDS/9. T Test/1_heart.csv +0 -304
- noshot/data/ML TS XAI/AIDS/9. T Test/Independent T Test.ipynb +0 -119
- noshot/data/ML TS XAI/AIDS/9. T Test/Paired T Test.ipynb +0 -118
- noshot/data/ML TS XAI/AIDS/9. T Test/T Test Hash Function.ipynb +0 -142
- noshot/data/ML TS XAI/AIDS/9. T Test/T Test.ipynb +0 -158
- noshot/data/ML TS XAI/AIDS/9. T Test/height_weight_bmi.csv +0 -8389
- noshot/data/ML TS XAI/AIDS/9. T Test/iq_test.csv +0 -0
- noshot/data/ML TS XAI/AIDS/Others (AllinOne)/All In One.ipynb +0 -4581
- noshot/data/ML TS XAI/CN/1. Chat Application/chat.java +0 -81
- noshot/data/ML TS XAI/CN/1. Chat Application/output.png +0 -0
- noshot/data/ML TS XAI/CN/1. Chat Application/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/LAN.tcl +0 -65
- noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/analysis.awk +0 -44
- noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/output.png +0 -0
- noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/complexdcf.tcl +0 -229
- noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/output.png +0 -0
- noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/2. File Transfer/file_to_send.txt +0 -2
- noshot/data/ML TS XAI/CN/2. File Transfer/filetransfer.java +0 -119
- noshot/data/ML TS XAI/CN/2. File Transfer/output.png +0 -0
- noshot/data/ML TS XAI/CN/2. File Transfer/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/Client.class +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/MyServerImpl.class +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/MyServerIntf.class +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/Server.class +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/output.png +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/rmi.java +0 -56
- noshot/data/ML TS XAI/CN/4. Wired Network/output.png +0 -0
- noshot/data/ML TS XAI/CN/4. Wired Network/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/4. Wired Network/wired.awk +0 -25
- noshot/data/ML TS XAI/CN/4. Wired Network/wired.tcl +0 -81
- noshot/data/ML TS XAI/CN/5. Wireless Network/output.png +0 -0
- noshot/data/ML TS XAI/CN/5. Wireless Network/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/5. Wireless Network/wireless.awk +0 -27
- noshot/data/ML TS XAI/CN/5. Wireless Network/wireless.tcl +0 -153
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/analysis.awk +0 -27
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/output.png +0 -0
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/sack.tcl +0 -86
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/vegas.tcl +0 -86
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/analysis.awk +0 -28
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/output.png +0 -0
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/reno.tcl +0 -78
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/tahoe.tcl +0 -79
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/analysis.awk +0 -27
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/flow.tcl +0 -163
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/output.png +0 -0
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/DV.tcl +0 -111
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/LS.tcl +0 -106
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/analysis.awk +0 -36
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/output.png +0 -0
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/analysis.awk +0 -20
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/broadcast.tcl +0 -76
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/multicast.tcl +0 -103
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/output.png +0 -0
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/9. DHCP/DHCP.java +0 -125
- noshot/data/ML TS XAI/CN/9. DHCP/output.png +0 -0
- noshot/data/ML TS XAI/CN/9. DHCP/procedure.png +0 -0
- noshot/data/ML TS XAI/NLP/NLP 1/1-Prereqs.py +0 -18
- noshot/data/ML TS XAI/NLP/NLP 1/2-Chi2test.py +0 -83
- noshot/data/ML TS XAI/NLP/NLP 1/2-T-test.py +0 -79
- noshot/data/ML TS XAI/NLP/NLP 1/3-WSD-nb.py +0 -53
- noshot/data/ML TS XAI/NLP/NLP 1/4-Hindle-Rooth.py +0 -53
- noshot/data/ML TS XAI/NLP/NLP 1/5-HMM-Trellis.py +0 -82
- noshot/data/ML TS XAI/NLP/NLP 1/6-HMM-Viterbi.py +0 -16
- noshot/data/ML TS XAI/NLP/NLP 1/7-PCFG-parsetree.py +0 -15
- noshot/data/ML TS XAI/NLP/NLP 1/Chi2test.ipynb +0 -285
- noshot/data/ML TS XAI/NLP/NLP 1/Hindle-Rooth.ipynb +0 -179
- noshot/data/ML TS XAI/NLP/NLP 1/Lab 10 - Text generator using LSTM.ipynb +0 -1461
- noshot/data/ML TS XAI/NLP/NLP 1/Lab 11 NMT.ipynb +0 -2307
- noshot/data/ML TS XAI/NLP/NLP 1/PCFG.ipynb +0 -134
- noshot/data/ML TS XAI/NLP/NLP 1/Prereqs.ipynb +0 -131
- noshot/data/ML TS XAI/NLP/NLP 1/T test.ipynb +0 -252
- noshot/data/ML TS XAI/NLP/NLP 1/TFIDF BOW.ipynb +0 -171
- noshot/data/ML TS XAI/NLP/NLP 1/Trellis.ipynb +0 -244
- noshot/data/ML TS XAI/NLP/NLP 1/WSD.ipynb +0 -645
- noshot/data/ML TS XAI/NLP/NLP 1/Word2Vec.ipynb +0 -93
- noshot/data/ML TS XAI/NLP/NLP 2/Lab01(tokenizer)/tokenizer.ipynb +0 -370
- noshot/data/ML TS XAI/NLP/NLP 2/Lab01(tokenizer)/training_tokenizer.txt +0 -6
- noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/exp0.ipynb +0 -274
- noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/lab2.ipynb +0 -905
- noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/test.txt +0 -1
- noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/tokenizing.ipynb +0 -272
- noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/collocation.ipynb +0 -332
- noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/lab3.ipynb +0 -549
- noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/nlp.txt +0 -1
- noshot/data/ML TS XAI/NLP/NLP 2/Lab04(collocation)/Lab4-NLP-Exp-2.ipynb +0 -817
- noshot/data/ML TS XAI/NLP/NLP 2/Lab04(collocation)/collocation.ipynb +0 -332
- noshot/data/ML TS XAI/NLP/NLP 2/Lab05(WSD)/NLP-Lab-5-Exp3.ipynb +0 -231
- noshot/data/ML TS XAI/NLP/NLP 2/Lab05(WSD)/word-sense-disambiguation.ipynb +0 -507
- noshot/data/ML TS XAI/NLP/NLP 2/Lab06(additional-exercise)/lab6.ipynb +0 -134
- noshot/data/ML TS XAI/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP Exp 4.ipynb +0 -255
- noshot/data/ML TS XAI/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP_Exp_5.ipynb +0 -159
- noshot/data/ML TS XAI/NLP/NLP 2/Lab08(PCFG)/PCFG.ipynb +0 -282
- noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/Lab 9 - MLP classifier.ipynb +0 -670
- noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/MLP-alternative-code.ipynb +0 -613
- noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/hindle-rooth-algorithm.ipynb +0 -74
- noshot/data/ML TS XAI/NLP/NLP 2/Lab10(LSTM)/Lab_10_Text_generator_using_LSTM.ipynb +0 -480
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Machine-translation.ipynb +0 -445
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Viterbi-PCFG.ipynb +0 -105
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/corpora_tools.py +0 -87
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/data_utils.py +0 -11
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/train_translator.py +0 -83
- noshot/data/ML TS XAI/NLP/NLP 2/Lab12(Information-Extraction)/Information_Extraction.ipynb +0 -201
- noshot/data/ML TS XAI/NLP/NLP 3/Backtrack-without-Verbitri.ipynb +0 -185
- noshot/data/ML TS XAI/NLP/NLP 3/Backward-Procedure.ipynb +0 -597
- noshot/data/ML TS XAI/NLP/NLP 3/Bag_of.ipynb +0 -1422
- noshot/data/ML TS XAI/NLP/NLP 3/CYK-algorithm.ipynb +0 -1067
- noshot/data/ML TS XAI/NLP/NLP 3/Forward-Procedure.ipynb +0 -477
- noshot/data/ML TS XAI/NLP/NLP 3/LSTM.ipynb +0 -1290
- noshot/data/ML TS XAI/NLP/NLP 3/Lab 10 - Text generator using LSTM.ipynb +0 -1461
- noshot/data/ML TS XAI/NLP/NLP 3/Lab 11 NMT.ipynb +0 -2307
- noshot/data/ML TS XAI/NLP/NLP 3/NLP-LAB-4.ipynb +0 -216
- noshot/data/ML TS XAI/NLP/NLP 3/NLP-LAB-5.ipynb +0 -216
- noshot/data/ML TS XAI/NLP/NLP 3/abc.txt +0 -6
- noshot/data/ML TS XAI/NLP/NLP 3/ex-1-nltk.ipynb +0 -711
- noshot/data/ML TS XAI/NLP/NLP 3/ex-2-nlp.ipynb +0 -267
- noshot/data/ML TS XAI/NLP/NLP 3/exp8&9.ipynb +0 -305
- noshot/data/ML TS XAI/NLP/NLP 3/hind.ipynb +0 -287
- noshot/data/ML TS XAI/NLP/NLP 3/lab66.ipynb +0 -752
- noshot/data/ML TS XAI/NLP/NLP 3/leb_3.ipynb +0 -612
- noshot/data/ML TS XAI/NLP/NLP 3/naive_bayes_classifier.pkl +0 -0
- noshot/data/ML TS XAI/NLP/NLP 3/nlp_leb_1.ipynb +0 -3008
- noshot/data/ML TS XAI/NLP/NLP 3/nlp_leb_2.ipynb +0 -3095
- noshot/data/ML TS XAI/NLP/NLP 3/nlplab-9.ipynb +0 -295
- noshot/data/ML TS XAI/NLP/NLP 3/nltk-ex-4.ipynb +0 -506
- noshot/data/ML TS XAI/NLP/NLP 3/text1.txt +0 -48
- noshot/data/ML TS XAI/NLP/NLP 3/text2.txt +0 -8
- noshot/data/ML TS XAI/NLP/NLP 3/text3.txt +0 -48
- noshot/data/ML TS XAI/NLP/NLP 3/translation-rnn.ipynb +0 -812
- noshot/data/ML TS XAI/NLP/NLP 3/word2vector.ipynb +0 -173
- noshot/data/ML TS XAI/NLP/NLP 4/Backward Procedure Algorithm.ipynb +0 -179
- noshot/data/ML TS XAI/NLP/NLP 4/Chi Square Collocation.ipynb +0 -208
- noshot/data/ML TS XAI/NLP/NLP 4/Collocation (T test).ipynb +0 -188
- noshot/data/ML TS XAI/NLP/NLP 4/Experiment 1.ipynb +0 -437
- noshot/data/ML TS XAI/NLP/NLP 4/Forward Procedure Algorithm.ipynb +0 -132
- noshot/data/ML TS XAI/NLP/NLP 4/Hindle Rooth.ipynb +0 -414
- noshot/data/ML TS XAI/NLP/NLP 4/MachineTranslation.ipynb +0 -368
- noshot/data/ML TS XAI/NLP/NLP 4/Multi Layer Perceptron using MLPClassifier.ipynb +0 -86
- noshot/data/ML TS XAI/NLP/NLP 4/Multi Layer Perceptron using Tensorflow.ipynb +0 -112
- noshot/data/ML TS XAI/NLP/NLP 4/PCFG Inside Probability.ipynb +0 -451
- noshot/data/ML TS XAI/NLP/NLP 4/Text Generation using LSTM.ipynb +0 -297
- noshot/data/ML TS XAI/NLP/NLP 4/Viterbi.ipynb +0 -310
- noshot/data/ML TS XAI/NLP/NLP 4/Word Sense Disambiguation.ipynb +0 -335
- noshot/data/ML TS XAI/NLP/NLP 5/10.Text Generation using LSTM.ipynb +0 -316
- noshot/data/ML TS XAI/NLP/NLP 5/11.Machine Translation.ipynb +0 -868
- noshot/data/ML TS XAI/NLP/NLP 5/2.T and Chi2 Test.ipynb +0 -204
- noshot/data/ML TS XAI/NLP/NLP 5/3.Word Sense Diambiguation.ipynb +0 -234
- noshot/data/ML TS XAI/NLP/NLP 5/4.Hinddle and Rooth.ipynb +0 -128
- noshot/data/ML TS XAI/NLP/NLP 5/5.Forward and Backward.ipynb +0 -149
- noshot/data/ML TS XAI/NLP/NLP 5/6.Viterbi.ipynb +0 -111
- noshot/data/ML TS XAI/NLP/NLP 5/7.PCFG Parse Tree.ipynb +0 -134
- noshot/data/ML TS XAI/NLP/NLP 5/7.PCFG using cyk.ipynb +0 -101
- noshot/data/ML TS XAI/NLP/NLP 5/8.Bag of words and TF-IDF.ipynb +0 -310
- noshot/data/ML TS XAI/NLP/NLP 5/9.Word2Vector.ipynb +0 -78
- noshot/data/ML TS XAI/NLP/NLP 5/NLP ALL In One.ipynb +0 -2619
- noshot/data/ML TS XAI/NLP/NLP 5/sample1.txt +0 -15
- noshot/data/ML TS XAI/NLP/NLP 5/sample2.txt +0 -4
- noshot/data/ML TS XAI/NLP/NLP 5/word2vec_model.bin +0 -0
- noshot/data/ML TS XAI/NLP/NLP 6/1. Tokenize, Tagging, NER, Parse Tree.ipynb +0 -312
- noshot/data/ML TS XAI/NLP/NLP 6/2. T Test and Chi2 Test.ipynb +0 -185
- noshot/data/ML TS XAI/NLP/NLP 6/3. Naive Bayes WSD.ipynb +0 -199
- noshot/data/ML TS XAI/NLP/NLP 6/4. Hinddle and Rooth.ipynb +0 -151
- noshot/data/ML TS XAI/NLP/NLP 6/5 and 6 FWD, BWD, Viterbi.ipynb +0 -164
- noshot/data/ML TS XAI/NLP/NLP 6/7. PCFG using CYK.ipynb +0 -383
- noshot/data/ML TS XAI/NLP/NLP 6/8. BOW and TF-IDF.ipynb +0 -252
- noshot/data/ML TS XAI/Ubuntu CN Lab.iso +0 -0
- noshot-0.1.7.dist-info/RECORD +0 -216
- {noshot-0.1.7.dist-info → noshot-0.1.8.dist-info}/LICENSE.txt +0 -0
- {noshot-0.1.7.dist-info → noshot-0.1.8.dist-info}/WHEEL +0 -0
- {noshot-0.1.7.dist-info → noshot-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,817 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "markdown",
|
5
|
-
"id": "bac2330f",
|
6
|
-
"metadata": {},
|
7
|
-
"source": [
|
8
|
-
"Code credits: [**V Tarun Thothadri**](https://github.com/TarunThothadri)"
|
9
|
-
]
|
10
|
-
},
|
11
|
-
{
|
12
|
-
"cell_type": "code",
|
13
|
-
"execution_count": 1,
|
14
|
-
"id": "3969787f",
|
15
|
-
"metadata": {},
|
16
|
-
"outputs": [],
|
17
|
-
"source": [
|
18
|
-
"import nltk\n",
|
19
|
-
"import re\n",
|
20
|
-
"from nltk.corpus import wordnet"
|
21
|
-
]
|
22
|
-
},
|
23
|
-
{
|
24
|
-
"cell_type": "markdown",
|
25
|
-
"id": "f4abdabf",
|
26
|
-
"metadata": {},
|
27
|
-
"source": [
|
28
|
-
"# Collocation"
|
29
|
-
]
|
30
|
-
},
|
31
|
-
{
|
32
|
-
"cell_type": "code",
|
33
|
-
"execution_count": 2,
|
34
|
-
"id": "675c4111",
|
35
|
-
"metadata": {},
|
36
|
-
"outputs": [],
|
37
|
-
"source": [
|
38
|
-
"text = \"The computer is a miraculous scientific invention. It is a can store data and perform complex calculations in a very short period of time. Its primary characteristics are speed and accuracy. As a result, computers facilitate quick transactions and communication. It’s now common in schools, colleges, libraries, hospitals, offices, and banks. It is used in factories to save manual and mental labour as well as for quality control. A computer is a miraculous scientific invention. It is also used in various fields of research, such as space research. Computer courses at various levels are now being introduced in schools and colleges. The use of computers in specific fields is required for efficient service. Newspapers are the mirror of the world. It plays an important role in modern civilization. Newspapers are published in different languages in our country. It helps us to gain common sense. Provides reviews and puzzles on newspaper reports, speeches, business, movies, sports, etc. Newspapers are read for both profit and pleasure. It brings to the public valuable perspectives, acts of injustice, oppression and maladministration. It angered the people and criticized the authorities for their failures. It forms public opinion. Therefore freedom of the press should not be hampered. Coal is a valuable hard, black material extracted from mines. Wood that has been buried for a long time becomes coal due to a chemical change. Earthquakes cause vast forest areas to sink underground and contribute to such changes as a result of tremendous heat and pressure. Coal mines can be found in our country at Dhanbad, Jharia, Giridih, Chaibasa, and other locations. Coal is exported from India to Japan, Nepal, and Bangladesh. Coal is used as a fuel in both homes and factories and industries. The majority of trains and steamers move by burning coal in steam engines.\""
|
39
|
-
]
|
40
|
-
},
|
41
|
-
{
|
42
|
-
"cell_type": "code",
|
43
|
-
"execution_count": 3,
|
44
|
-
"id": "57bc2cd8",
|
45
|
-
"metadata": {},
|
46
|
-
"outputs": [],
|
47
|
-
"source": [
|
48
|
-
"R_patterns = [\n",
|
49
|
-
" (r'won\\'t', 'will not'),\n",
|
50
|
-
" (r'can\\'t', 'cannot'),\n",
|
51
|
-
" (r'i\\'m', 'i am'),\n",
|
52
|
-
" (r'(\\w+)\\'ll', '\\g<1> will'),\n",
|
53
|
-
" (r'(\\w+)n\\'t', '\\g<1> not'),\n",
|
54
|
-
" (r'(\\w+)\\'ve', '\\g<1> have'),\n",
|
55
|
-
" (r'(\\w+)\\'s', '\\g<1> is'),\n",
|
56
|
-
" (r'(\\w+)\\'re', '\\g<1> are'),\n",
|
57
|
-
"]"
|
58
|
-
]
|
59
|
-
},
|
60
|
-
{
|
61
|
-
"cell_type": "code",
|
62
|
-
"execution_count": 4,
|
63
|
-
"id": "23adb6ec",
|
64
|
-
"metadata": {},
|
65
|
-
"outputs": [],
|
66
|
-
"source": [
|
67
|
-
"class REReplacer(object):\n",
|
68
|
-
" def __init__(self, pattern = R_patterns):\n",
|
69
|
-
" self.pattern = [(re.compile(regex), repl) for (regex, repl) in pattern]\n",
|
70
|
-
" def replace(self, text):\n",
|
71
|
-
" s = text\n",
|
72
|
-
" for (pattern, repl) in self.pattern:\n",
|
73
|
-
" s = re.sub(pattern, repl, s)\n",
|
74
|
-
" return s"
|
75
|
-
]
|
76
|
-
},
|
77
|
-
{
|
78
|
-
"cell_type": "code",
|
79
|
-
"execution_count": 5,
|
80
|
-
"id": "0a0ee2ba",
|
81
|
-
"metadata": {},
|
82
|
-
"outputs": [],
|
83
|
-
"source": [
|
84
|
-
"rep_word = REReplacer()\n",
|
85
|
-
"text = rep_word.replace(text)"
|
86
|
-
]
|
87
|
-
},
|
88
|
-
{
|
89
|
-
"cell_type": "code",
|
90
|
-
"execution_count": 49,
|
91
|
-
"id": "e8833f53",
|
92
|
-
"metadata": {},
|
93
|
-
"outputs": [],
|
94
|
-
"source": [
|
95
|
-
"text = text.replace(',','')\n",
|
96
|
-
"text = text.replace('.','')\n",
|
97
|
-
"text = text.replace('\\'','')\n",
|
98
|
-
"text = text.replace('\"','')\n",
|
99
|
-
"text = text.replace('’','')\n",
|
100
|
-
"text = text.replace('\\n','')\n",
|
101
|
-
"text = text.lower()"
|
102
|
-
]
|
103
|
-
},
|
104
|
-
{
|
105
|
-
"cell_type": "code",
|
106
|
-
"execution_count": 33,
|
107
|
-
"id": "b6307deb",
|
108
|
-
"metadata": {},
|
109
|
-
"outputs": [
|
110
|
-
{
|
111
|
-
"data": {
|
112
|
-
"text/plain": [
|
113
|
-
"['the',\n",
|
114
|
-
" 'computer',\n",
|
115
|
-
" 'is',\n",
|
116
|
-
" 'a',\n",
|
117
|
-
" 'miraculous',\n",
|
118
|
-
" 'scientific',\n",
|
119
|
-
" 'invention',\n",
|
120
|
-
" 'it',\n",
|
121
|
-
" 'is',\n",
|
122
|
-
" 'a',\n",
|
123
|
-
" 'can',\n",
|
124
|
-
" 'store',\n",
|
125
|
-
" 'data',\n",
|
126
|
-
" 'and',\n",
|
127
|
-
" 'perform',\n",
|
128
|
-
" 'complex',\n",
|
129
|
-
" 'calculations',\n",
|
130
|
-
" 'in',\n",
|
131
|
-
" 'a',\n",
|
132
|
-
" 'very',\n",
|
133
|
-
" 'short',\n",
|
134
|
-
" 'period',\n",
|
135
|
-
" 'of',\n",
|
136
|
-
" 'time',\n",
|
137
|
-
" 'its',\n",
|
138
|
-
" 'primary',\n",
|
139
|
-
" 'characteristics',\n",
|
140
|
-
" 'are',\n",
|
141
|
-
" 'speed',\n",
|
142
|
-
" 'and',\n",
|
143
|
-
" 'accuracy',\n",
|
144
|
-
" 'as',\n",
|
145
|
-
" 'a',\n",
|
146
|
-
" 'result',\n",
|
147
|
-
" 'computers',\n",
|
148
|
-
" 'facilitate',\n",
|
149
|
-
" 'quick',\n",
|
150
|
-
" 'transactions',\n",
|
151
|
-
" 'and',\n",
|
152
|
-
" 'communication',\n",
|
153
|
-
" 'its',\n",
|
154
|
-
" 'now',\n",
|
155
|
-
" 'common',\n",
|
156
|
-
" 'in',\n",
|
157
|
-
" 'schools',\n",
|
158
|
-
" 'colleges',\n",
|
159
|
-
" 'libraries',\n",
|
160
|
-
" 'hospitals',\n",
|
161
|
-
" 'offices',\n",
|
162
|
-
" 'and',\n",
|
163
|
-
" 'banks',\n",
|
164
|
-
" 'it',\n",
|
165
|
-
" 'is',\n",
|
166
|
-
" 'used',\n",
|
167
|
-
" 'in',\n",
|
168
|
-
" 'factories',\n",
|
169
|
-
" 'to',\n",
|
170
|
-
" 'save',\n",
|
171
|
-
" 'manual',\n",
|
172
|
-
" 'and',\n",
|
173
|
-
" 'mental',\n",
|
174
|
-
" 'labour',\n",
|
175
|
-
" 'as',\n",
|
176
|
-
" 'well',\n",
|
177
|
-
" 'as',\n",
|
178
|
-
" 'for',\n",
|
179
|
-
" 'quality',\n",
|
180
|
-
" 'control',\n",
|
181
|
-
" 'a',\n",
|
182
|
-
" 'computer',\n",
|
183
|
-
" 'is',\n",
|
184
|
-
" 'a',\n",
|
185
|
-
" 'miraculous',\n",
|
186
|
-
" 'scientific',\n",
|
187
|
-
" 'invention',\n",
|
188
|
-
" 'it',\n",
|
189
|
-
" 'is',\n",
|
190
|
-
" 'also',\n",
|
191
|
-
" 'used',\n",
|
192
|
-
" 'in',\n",
|
193
|
-
" 'various',\n",
|
194
|
-
" 'fields',\n",
|
195
|
-
" 'of',\n",
|
196
|
-
" 'research',\n",
|
197
|
-
" 'such',\n",
|
198
|
-
" 'as',\n",
|
199
|
-
" 'space',\n",
|
200
|
-
" 'research',\n",
|
201
|
-
" 'computer',\n",
|
202
|
-
" 'courses',\n",
|
203
|
-
" 'at',\n",
|
204
|
-
" 'various',\n",
|
205
|
-
" 'levels',\n",
|
206
|
-
" 'are',\n",
|
207
|
-
" 'now',\n",
|
208
|
-
" 'being',\n",
|
209
|
-
" 'introduced',\n",
|
210
|
-
" 'in',\n",
|
211
|
-
" 'schools',\n",
|
212
|
-
" 'and',\n",
|
213
|
-
" 'colleges',\n",
|
214
|
-
" 'the',\n",
|
215
|
-
" 'use',\n",
|
216
|
-
" 'of',\n",
|
217
|
-
" 'computers',\n",
|
218
|
-
" 'in',\n",
|
219
|
-
" 'specific',\n",
|
220
|
-
" 'fields',\n",
|
221
|
-
" 'is',\n",
|
222
|
-
" 'required',\n",
|
223
|
-
" 'for',\n",
|
224
|
-
" 'efficient',\n",
|
225
|
-
" 'service',\n",
|
226
|
-
" 'newspapers',\n",
|
227
|
-
" 'are',\n",
|
228
|
-
" 'the',\n",
|
229
|
-
" 'mirror',\n",
|
230
|
-
" 'of',\n",
|
231
|
-
" 'the',\n",
|
232
|
-
" 'world',\n",
|
233
|
-
" 'it',\n",
|
234
|
-
" 'plays',\n",
|
235
|
-
" 'an',\n",
|
236
|
-
" 'important',\n",
|
237
|
-
" 'role',\n",
|
238
|
-
" 'in',\n",
|
239
|
-
" 'modern',\n",
|
240
|
-
" 'civilization',\n",
|
241
|
-
" 'newspapers',\n",
|
242
|
-
" 'are',\n",
|
243
|
-
" 'published',\n",
|
244
|
-
" 'in',\n",
|
245
|
-
" 'different',\n",
|
246
|
-
" 'languages',\n",
|
247
|
-
" '\\u200b\\u200bin',\n",
|
248
|
-
" 'our',\n",
|
249
|
-
" 'country',\n",
|
250
|
-
" 'it',\n",
|
251
|
-
" 'helps',\n",
|
252
|
-
" 'us',\n",
|
253
|
-
" 'to',\n",
|
254
|
-
" 'gain',\n",
|
255
|
-
" 'common',\n",
|
256
|
-
" 'sense',\n",
|
257
|
-
" 'provides',\n",
|
258
|
-
" 'reviews',\n",
|
259
|
-
" 'and',\n",
|
260
|
-
" 'puzzles',\n",
|
261
|
-
" 'on',\n",
|
262
|
-
" 'newspaper',\n",
|
263
|
-
" 'reports',\n",
|
264
|
-
" 'speeches',\n",
|
265
|
-
" 'business',\n",
|
266
|
-
" 'movies',\n",
|
267
|
-
" 'sports',\n",
|
268
|
-
" 'etc',\n",
|
269
|
-
" 'newspapers',\n",
|
270
|
-
" 'are',\n",
|
271
|
-
" 'read',\n",
|
272
|
-
" 'for',\n",
|
273
|
-
" 'both',\n",
|
274
|
-
" 'profit',\n",
|
275
|
-
" 'and',\n",
|
276
|
-
" 'pleasure',\n",
|
277
|
-
" 'it',\n",
|
278
|
-
" 'brings',\n",
|
279
|
-
" 'to',\n",
|
280
|
-
" 'the',\n",
|
281
|
-
" 'public',\n",
|
282
|
-
" 'valuable',\n",
|
283
|
-
" 'perspectives',\n",
|
284
|
-
" 'acts',\n",
|
285
|
-
" 'of',\n",
|
286
|
-
" 'injustice',\n",
|
287
|
-
" 'oppression',\n",
|
288
|
-
" 'and',\n",
|
289
|
-
" 'maladministration',\n",
|
290
|
-
" 'it',\n",
|
291
|
-
" 'angered',\n",
|
292
|
-
" 'the',\n",
|
293
|
-
" 'people',\n",
|
294
|
-
" 'and',\n",
|
295
|
-
" 'criticized',\n",
|
296
|
-
" 'the',\n",
|
297
|
-
" 'authorities',\n",
|
298
|
-
" 'for',\n",
|
299
|
-
" 'their',\n",
|
300
|
-
" 'failures',\n",
|
301
|
-
" 'it',\n",
|
302
|
-
" 'forms',\n",
|
303
|
-
" 'public',\n",
|
304
|
-
" 'opinion',\n",
|
305
|
-
" 'therefore',\n",
|
306
|
-
" 'freedom',\n",
|
307
|
-
" 'of',\n",
|
308
|
-
" 'the',\n",
|
309
|
-
" 'press',\n",
|
310
|
-
" 'should',\n",
|
311
|
-
" 'not',\n",
|
312
|
-
" 'be',\n",
|
313
|
-
" 'hampered',\n",
|
314
|
-
" 'coal',\n",
|
315
|
-
" 'is',\n",
|
316
|
-
" 'a',\n",
|
317
|
-
" 'valuable',\n",
|
318
|
-
" 'hard',\n",
|
319
|
-
" 'black',\n",
|
320
|
-
" 'material',\n",
|
321
|
-
" 'extracted',\n",
|
322
|
-
" 'from',\n",
|
323
|
-
" 'mines',\n",
|
324
|
-
" 'wood',\n",
|
325
|
-
" 'that',\n",
|
326
|
-
" 'has',\n",
|
327
|
-
" 'been',\n",
|
328
|
-
" 'buried',\n",
|
329
|
-
" 'for',\n",
|
330
|
-
" 'a',\n",
|
331
|
-
" 'long',\n",
|
332
|
-
" 'time',\n",
|
333
|
-
" 'becomes',\n",
|
334
|
-
" 'coal',\n",
|
335
|
-
" 'due',\n",
|
336
|
-
" 'to',\n",
|
337
|
-
" 'a',\n",
|
338
|
-
" 'chemical',\n",
|
339
|
-
" 'change',\n",
|
340
|
-
" 'earthquakes',\n",
|
341
|
-
" 'cause',\n",
|
342
|
-
" 'vast',\n",
|
343
|
-
" 'forest',\n",
|
344
|
-
" 'areas',\n",
|
345
|
-
" 'to',\n",
|
346
|
-
" 'sink',\n",
|
347
|
-
" 'underground',\n",
|
348
|
-
" 'and',\n",
|
349
|
-
" 'contribute',\n",
|
350
|
-
" 'to',\n",
|
351
|
-
" 'such',\n",
|
352
|
-
" 'changes',\n",
|
353
|
-
" 'as',\n",
|
354
|
-
" 'a',\n",
|
355
|
-
" 'result',\n",
|
356
|
-
" 'of',\n",
|
357
|
-
" 'tremendous',\n",
|
358
|
-
" 'heat',\n",
|
359
|
-
" 'and',\n",
|
360
|
-
" 'pressure',\n",
|
361
|
-
" 'coal',\n",
|
362
|
-
" 'mines',\n",
|
363
|
-
" 'can',\n",
|
364
|
-
" 'be',\n",
|
365
|
-
" 'found',\n",
|
366
|
-
" 'in',\n",
|
367
|
-
" 'our',\n",
|
368
|
-
" 'country',\n",
|
369
|
-
" 'at',\n",
|
370
|
-
" 'dhanbad',\n",
|
371
|
-
" 'jharia',\n",
|
372
|
-
" 'giridih',\n",
|
373
|
-
" 'chaibasa',\n",
|
374
|
-
" 'and',\n",
|
375
|
-
" 'other',\n",
|
376
|
-
" 'locations',\n",
|
377
|
-
" 'coal',\n",
|
378
|
-
" 'is',\n",
|
379
|
-
" 'exported',\n",
|
380
|
-
" 'from',\n",
|
381
|
-
" 'india',\n",
|
382
|
-
" 'to',\n",
|
383
|
-
" 'japan',\n",
|
384
|
-
" 'nepal',\n",
|
385
|
-
" 'and',\n",
|
386
|
-
" 'bangladesh',\n",
|
387
|
-
" 'coal',\n",
|
388
|
-
" 'is',\n",
|
389
|
-
" 'used',\n",
|
390
|
-
" 'as',\n",
|
391
|
-
" 'a',\n",
|
392
|
-
" 'fuel',\n",
|
393
|
-
" 'in',\n",
|
394
|
-
" 'both',\n",
|
395
|
-
" 'homes',\n",
|
396
|
-
" 'and',\n",
|
397
|
-
" 'factories',\n",
|
398
|
-
" 'and',\n",
|
399
|
-
" 'industries',\n",
|
400
|
-
" 'the',\n",
|
401
|
-
" 'majority',\n",
|
402
|
-
" 'of',\n",
|
403
|
-
" 'trains',\n",
|
404
|
-
" 'and',\n",
|
405
|
-
" 'steamers',\n",
|
406
|
-
" 'move',\n",
|
407
|
-
" 'by',\n",
|
408
|
-
" 'burning',\n",
|
409
|
-
" 'coal',\n",
|
410
|
-
" 'in',\n",
|
411
|
-
" 'steam',\n",
|
412
|
-
" 'engines']"
|
413
|
-
]
|
414
|
-
},
|
415
|
-
"execution_count": 33,
|
416
|
-
"metadata": {},
|
417
|
-
"output_type": "execute_result"
|
418
|
-
}
|
419
|
-
],
|
420
|
-
"source": [
|
421
|
-
"from nltk.tokenize import TreebankWordTokenizer\n",
|
422
|
-
"tokenizer_wrd = TreebankWordTokenizer()\n",
|
423
|
-
"tokenized = tokenizer_wrd.tokenize(text)\n",
|
424
|
-
"tokenized"
|
425
|
-
]
|
426
|
-
},
|
427
|
-
{
|
428
|
-
"cell_type": "code",
|
429
|
-
"execution_count": 8,
|
430
|
-
"id": "10cac8f7",
|
431
|
-
"metadata": {},
|
432
|
-
"outputs": [],
|
433
|
-
"source": [
|
434
|
-
"d = {}\n",
|
435
|
-
"for word in tokenized:\n",
|
436
|
-
" if word not in d:\n",
|
437
|
-
" d[word] = 1\n",
|
438
|
-
" else:\n",
|
439
|
-
" d[word] += 1"
|
440
|
-
]
|
441
|
-
},
|
442
|
-
{
|
443
|
-
"cell_type": "code",
|
444
|
-
"execution_count": 37,
|
445
|
-
"id": "aa070a0a",
|
446
|
-
"metadata": {},
|
447
|
-
"outputs": [
|
448
|
-
{
|
449
|
-
"name": "stdout",
|
450
|
-
"output_type": "stream",
|
451
|
-
"text": [
|
452
|
-
"The number of words : 300\n",
|
453
|
-
"The number of unique words : 178\n",
|
454
|
-
"Frequencies of the words.....\n",
|
455
|
-
" \n"
|
456
|
-
]
|
457
|
-
}
|
458
|
-
],
|
459
|
-
"source": [
|
460
|
-
"nuws = len(d)\n",
|
461
|
-
"nws = len(tokenized)\n",
|
462
|
-
"print(\"The number of words : \",str(nws))\n",
|
463
|
-
"print(\"The number of unique words : \",str(nuws))\n",
|
464
|
-
"print(\"Frequencies of the words.....\")\n",
|
465
|
-
"print(\" \")\n",
|
466
|
-
"\n",
|
467
|
-
"#for i in d:\n",
|
468
|
-
"# print(i,\" \",str(d[i]))"
|
469
|
-
]
|
470
|
-
},
|
471
|
-
{
|
472
|
-
"cell_type": "code",
|
473
|
-
"execution_count": 38,
|
474
|
-
"id": "5a2a0e98",
|
475
|
-
"metadata": {},
|
476
|
-
"outputs": [
|
477
|
-
{
|
478
|
-
"name": "stdout",
|
479
|
-
"output_type": "stream",
|
480
|
-
"text": [
|
481
|
-
"Maximum probability bigram pair ....\n",
|
482
|
-
"is a 4\n",
|
483
|
-
"it is 3\n",
|
484
|
-
"as a 3\n"
|
485
|
-
]
|
486
|
-
}
|
487
|
-
],
|
488
|
-
"source": [
|
489
|
-
"#Bigram\n",
|
490
|
-
"jd = {}\n",
|
491
|
-
"for i in range(len(tokenized)):\n",
|
492
|
-
" w1 = i\n",
|
493
|
-
" w2 = i+1\n",
|
494
|
-
" if(w2 != len(tokenized)):\n",
|
495
|
-
" count = 0\n",
|
496
|
-
" wd1 = tokenized[w1]\n",
|
497
|
-
" wd2 = tokenized[w2]\n",
|
498
|
-
" for j in range(len(tokenized)):\n",
|
499
|
-
" if(tokenized[j] == wd1 and tokenized[j+1] == wd2):\n",
|
500
|
-
" count += 1\n",
|
501
|
-
" jd[str(wd1 + \" \" + wd2)] = count\n",
|
502
|
-
"'''for i in jd:\n",
|
503
|
-
" print(\"Joint probability of \",i,\" : \",str(jd[i]/nws))'''\n",
|
504
|
-
" \n",
|
505
|
-
"maxp = 0\n",
|
506
|
-
"maxq = 0\n",
|
507
|
-
"maxr = 0\n",
|
508
|
-
"for i in jd:\n",
|
509
|
-
" if jd[i] > maxp:\n",
|
510
|
-
" maxp = jd[i]\n",
|
511
|
-
" elif jd[i] >maxq and jd[i]<= maxp:\n",
|
512
|
-
" maxq = jd[i]\n",
|
513
|
-
" elif jd[i] > maxr and jd[i] <= maxr and jd[i] <= maxp:\n",
|
514
|
-
" maxr = jd[i]\n",
|
515
|
-
"print(\"Maximum probability bigram pair ....\")\n",
|
516
|
-
"j = 0\n",
|
517
|
-
"top_3 = []\n",
|
518
|
-
"for i in jd:\n",
|
519
|
-
" if jd[i] == maxp:\n",
|
520
|
-
" print(i,' ',str(jd[i]))\n",
|
521
|
-
" top_3.append(i)\n",
|
522
|
-
" j += 1\n",
|
523
|
-
" if(j>=3):\n",
|
524
|
-
" break\n",
|
525
|
-
" elif jd[i] == maxq:\n",
|
526
|
-
" print(i,' ',str(jd[i]))\n",
|
527
|
-
" top_3.append(i)\n",
|
528
|
-
" j += 1\n",
|
529
|
-
" if(j>=3):\n",
|
530
|
-
" break\n",
|
531
|
-
" elif jd[i] == maxr:\n",
|
532
|
-
" print(i,' ',str(jd[i]))\n",
|
533
|
-
" top_3.append(i)\n",
|
534
|
-
" j += 1\n",
|
535
|
-
" if(j>=3):\n",
|
536
|
-
" break"
|
537
|
-
]
|
538
|
-
},
|
539
|
-
{
|
540
|
-
"cell_type": "code",
|
541
|
-
"execution_count": 39,
|
542
|
-
"id": "6ead6a26",
|
543
|
-
"metadata": {},
|
544
|
-
"outputs": [
|
545
|
-
{
|
546
|
-
"name": "stdout",
|
547
|
-
"output_type": "stream",
|
548
|
-
"text": [
|
549
|
-
"Maximum probability trigram pair ....\n",
|
550
|
-
"computer is a 2\n",
|
551
|
-
"is a miraculous 2\n",
|
552
|
-
"a miraculous scientific 2\n",
|
553
|
-
"miraculous scientific invention 2\n",
|
554
|
-
"scientific invention it 2\n",
|
555
|
-
"invention it is 2\n",
|
556
|
-
"as a result 2\n"
|
557
|
-
]
|
558
|
-
}
|
559
|
-
],
|
560
|
-
"source": [
|
561
|
-
"#trigram\n",
|
562
|
-
"td = {}\n",
|
563
|
-
"for i in range(len(tokenized)):\n",
|
564
|
-
" w1 = i\n",
|
565
|
-
" w2 = i+1\n",
|
566
|
-
" w3 = i+2\n",
|
567
|
-
" if(w2!= len(tokenized) and w3 != len(tokenized)):\n",
|
568
|
-
" count = 0\n",
|
569
|
-
" wd1 = tokenized[w1]\n",
|
570
|
-
" wd2 = tokenized[w2]\n",
|
571
|
-
" wd3 = tokenized[w3]\n",
|
572
|
-
" for j in range(len(tokenized)):\n",
|
573
|
-
" if(tokenized[j] == wd1 and tokenized[j+1] == wd2 and tokenized[j+2] == wd3):\n",
|
574
|
-
" count += 1\n",
|
575
|
-
" td[str(wd1 + \" \" + wd2 + \" \" + wd3)] = count\n",
|
576
|
-
"'''for i in td:\n",
|
577
|
-
" print(\"Joint probability of \",i,\" : \",str(td[i]/nws))'''\n",
|
578
|
-
"\n",
|
579
|
-
" \n",
|
580
|
-
"maxp = 0\n",
|
581
|
-
"for i in td:\n",
|
582
|
-
" if td[i] > maxp:\n",
|
583
|
-
" maxp = td[i]\n",
|
584
|
-
"print(\"Maximum probability trigram pair ....\")\n",
|
585
|
-
"for i in td:\n",
|
586
|
-
" if td[i] == maxp:\n",
|
587
|
-
" print(i,' ',str(td[i]))"
|
588
|
-
]
|
589
|
-
},
|
590
|
-
{
|
591
|
-
"cell_type": "code",
|
592
|
-
"execution_count": 40,
|
593
|
-
"id": "e44a7339",
|
594
|
-
"metadata": {},
|
595
|
-
"outputs": [
|
596
|
-
{
|
597
|
-
"name": "stdout",
|
598
|
-
"output_type": "stream",
|
599
|
-
"text": [
|
600
|
-
"Maximum probability 4gram pair ....\n",
|
601
|
-
"computer is a miraculous 2\n",
|
602
|
-
"is a miraculous scientific 2\n",
|
603
|
-
"a miraculous scientific invention 2\n",
|
604
|
-
"miraculous scientific invention it 2\n",
|
605
|
-
"scientific invention it is 2\n"
|
606
|
-
]
|
607
|
-
}
|
608
|
-
],
|
609
|
-
"source": [
|
610
|
-
"#4gram\n",
|
611
|
-
"fd = {}\n",
|
612
|
-
"for i in range(len(tokenized)):\n",
|
613
|
-
" w1 = i\n",
|
614
|
-
" w2 = i+1\n",
|
615
|
-
" w3 = i+2\n",
|
616
|
-
" w4 = i+3\n",
|
617
|
-
" if(w2!= len(tokenized) and w3 != len(tokenized) and w3 != len(tokenized) and w4 != len(tokenized)):\n",
|
618
|
-
" count = 0\n",
|
619
|
-
" wd1 = tokenized[w1]\n",
|
620
|
-
" wd2 = tokenized[w2]\n",
|
621
|
-
" wd3 = tokenized[w3]\n",
|
622
|
-
" wd4 = tokenized[w4]\n",
|
623
|
-
" for j in range(len(tokenized)):\n",
|
624
|
-
" if(tokenized[j] == wd1 and tokenized[j+1] == wd2 and tokenized[j+2] == wd3 and tokenized[j+3] == wd4):\n",
|
625
|
-
" count += 1\n",
|
626
|
-
" fd[str(wd1 + \" \" + wd2 + \" \" + wd3 + \" \" + wd4)] = count\n",
|
627
|
-
"'''for i in fd:\n",
|
628
|
-
" print(\"Joint probability of \",i,\" : \",str(fd[i]/nws))'''\n",
|
629
|
-
"\n",
|
630
|
-
" \n",
|
631
|
-
"maxp = 0\n",
|
632
|
-
"for i in fd:\n",
|
633
|
-
" if fd[i] > maxp:\n",
|
634
|
-
" maxp = fd[i]\n",
|
635
|
-
"print(\"Maximum probability 4gram pair ....\")\n",
|
636
|
-
"for i in fd:\n",
|
637
|
-
" if fd[i] == maxp:\n",
|
638
|
-
" print(i,' ',str(fd[i]))"
|
639
|
-
]
|
640
|
-
},
|
641
|
-
{
|
642
|
-
"cell_type": "code",
|
643
|
-
"execution_count": 41,
|
644
|
-
"id": "ae199e34",
|
645
|
-
"metadata": {},
|
646
|
-
"outputs": [
|
647
|
-
{
|
648
|
-
"name": "stdout",
|
649
|
-
"output_type": "stream",
|
650
|
-
"text": [
|
651
|
-
"Maximum probability 4gram pair ....\n",
|
652
|
-
"computer is a miraculous scientific 2\n",
|
653
|
-
"is a miraculous scientific invention 2\n",
|
654
|
-
"a miraculous scientific invention it 2\n",
|
655
|
-
"miraculous scientific invention it is 2\n"
|
656
|
-
]
|
657
|
-
}
|
658
|
-
],
|
659
|
-
"source": [
|
660
|
-
"#5gram\n",
|
661
|
-
"fid = {}\n",
|
662
|
-
"for i in range(len(tokenized)):\n",
|
663
|
-
" w1 = i\n",
|
664
|
-
" w2 = i+1\n",
|
665
|
-
" w3 = i+2\n",
|
666
|
-
" w4 = i+3\n",
|
667
|
-
" w5 = i+4\n",
|
668
|
-
" if(w2!= len(tokenized) and w3 != len(tokenized) and w3 != len(tokenized) and w4 != len(tokenized) and w5 != len(tokenized)):\n",
|
669
|
-
" count = 0\n",
|
670
|
-
" wd1 = tokenized[w1]\n",
|
671
|
-
" wd2 = tokenized[w2]\n",
|
672
|
-
" wd3 = tokenized[w3]\n",
|
673
|
-
" wd4 = tokenized[w4]\n",
|
674
|
-
" wd5 = tokenized[w5]\n",
|
675
|
-
" for j in range(len(tokenized)):\n",
|
676
|
-
" if(tokenized[j] == wd1 and tokenized[j+1] == wd2 and tokenized[j+2] == wd3 and tokenized[j+3] == wd4 and tokenized[j+4] == wd5):\n",
|
677
|
-
" count += 1\n",
|
678
|
-
" fid[str(wd1 + \" \" + wd2 + \" \" + wd3 + \" \" + wd4 + \" \" + wd5)] = count\n",
|
679
|
-
"'''for i in fid:\n",
|
680
|
-
" print(\"Joint probability of \",i,\" : \",str(fid[i]/nws))'''\n",
|
681
|
-
"\n",
|
682
|
-
" \n",
|
683
|
-
"maxp = 0\n",
|
684
|
-
"for i in fid:\n",
|
685
|
-
" if fid[i] > maxp:\n",
|
686
|
-
" maxp = fid[i]\n",
|
687
|
-
"print(\"Maximum probability 4gram pair ....\")\n",
|
688
|
-
"for i in fid:\n",
|
689
|
-
" if fid[i] == maxp:\n",
|
690
|
-
" print(i,' ',str(fid[i]))"
|
691
|
-
]
|
692
|
-
},
|
693
|
-
{
|
694
|
-
"cell_type": "markdown",
|
695
|
-
"id": "c2facedf",
|
696
|
-
"metadata": {},
|
697
|
-
"source": [
|
698
|
-
"# t - test"
|
699
|
-
]
|
700
|
-
},
|
701
|
-
{
|
702
|
-
"cell_type": "code",
|
703
|
-
"execution_count": 42,
|
704
|
-
"id": "53dde70e",
|
705
|
-
"metadata": {},
|
706
|
-
"outputs": [
|
707
|
-
{
|
708
|
-
"name": "stdout",
|
709
|
-
"output_type": "stream",
|
710
|
-
"text": [
|
711
|
-
"The t score for 'is a' is : 1.8473570419766019\n",
|
712
|
-
"The t score for 'it is' is : 1.60151443479242\n",
|
713
|
-
"The t score for 'as a' is : 1.613119611856133\n"
|
714
|
-
]
|
715
|
-
}
|
716
|
-
],
|
717
|
-
"source": [
|
718
|
-
"for i in top_3:\n",
|
719
|
-
" wrd1 = i.split(' ')[0]\n",
|
720
|
-
" wrd2 = i.split(' ')[1]\n",
|
721
|
-
" p_wrd1 = float(d[wrd1]/nws)\n",
|
722
|
-
" p_wrd2 = float(d[wrd2]/nws)\n",
|
723
|
-
" mu = float(p_wrd1*p_wrd2)\n",
|
724
|
-
" mean = float(jd[i]/nws)\n",
|
725
|
-
" s_sq = float(mean*float(1-mean))\n",
|
726
|
-
" t_numerator = float(mean - mu)\n",
|
727
|
-
" t_denominator = float((s_sq/nws)**(0.5))\n",
|
728
|
-
" t_score = float(t_numerator/t_denominator)\n",
|
729
|
-
" print(\"The t score for '\"+i+\"' is : \"+str(t_score))\n",
|
730
|
-
" "
|
731
|
-
]
|
732
|
-
},
|
733
|
-
{
|
734
|
-
"cell_type": "markdown",
|
735
|
-
"id": "0c723ee4",
|
736
|
-
"metadata": {},
|
737
|
-
"source": [
|
738
|
-
"# Pearson's chi-square test"
|
739
|
-
]
|
740
|
-
},
|
741
|
-
{
|
742
|
-
"cell_type": "code",
|
743
|
-
"execution_count": 48,
|
744
|
-
"id": "efae5df1",
|
745
|
-
"metadata": {},
|
746
|
-
"outputs": [
|
747
|
-
{
|
748
|
-
"name": "stdout",
|
749
|
-
"output_type": "stream",
|
750
|
-
"text": [
|
751
|
-
"Observed : 4 7 5 284\n",
|
752
|
-
"Expected : 0.33 10.67 8.67 280.33\n",
|
753
|
-
"X^2 : 40.81484848484848 1.262314901593252 1.5535063437139562 0.04804658794991659\n",
|
754
|
-
"The chi-square score for 'is a' is : 43.67871631810561\n",
|
755
|
-
"Observed : 3 6 5 286\n",
|
756
|
-
"Expected : 0.24 8.76 7.76 283.24\n",
|
757
|
-
"X^2 : 31.739999999999995 0.8695890410958903 0.9816494845360824 0.026894506425645916\n",
|
758
|
-
"The chi-square score for 'it is' is : 33.61813303205762\n",
|
759
|
-
"Observed : 3 8 3 286\n",
|
760
|
-
"Expected : 0.22 10.78 5.78 283.22\n",
|
761
|
-
"X^2 : 35.129090909090905 0.716920222634508 1.3370934256055365 0.027287620930724694\n",
|
762
|
-
"The chi-square score for 'as a' is : 37.21039217826167\n"
|
763
|
-
]
|
764
|
-
}
|
765
|
-
],
|
766
|
-
"source": [
|
767
|
-
"for i in top_3:\n",
|
768
|
-
" wrd1 = i.split(' ')[0]\n",
|
769
|
-
" wrd2 = i.split(' ')[1]\n",
|
770
|
-
" c_wrd1 = d[wrd1]\n",
|
771
|
-
" c_n_wrd1 = nws - d[wrd1]\n",
|
772
|
-
" c_wrd2 = d[wrd2]\n",
|
773
|
-
" c_n_wrd2 = nws - d[wrd2]\n",
|
774
|
-
" o_w1w2 = jd[i]\n",
|
775
|
-
" o_nw1w2 = c_wrd2 - o_w1w2\n",
|
776
|
-
" o_w1nw2 = c_wrd1 - o_w1w2\n",
|
777
|
-
" o_nw1nw2 = c_n_wrd1 - o_nw1w2\n",
|
778
|
-
" e_w1w2 = float((c_wrd1*c_wrd2)/nws)\n",
|
779
|
-
" e_nw1w2 = float((c_n_wrd1*c_wrd2)/nws)\n",
|
780
|
-
" e_w1nw2 = float((c_n_wrd2*c_wrd1)/nws)\n",
|
781
|
-
" e_nw1nw2 = float((c_n_wrd2*c_n_wrd1)/nws)\n",
|
782
|
-
" x_w1w2 = float(((o_w1w2 - e_w1w2)**2)/e_w1w2)\n",
|
783
|
-
" x_nw1w2 = float(((o_nw1w2 - e_nw1w2)**2)/e_nw1w2)\n",
|
784
|
-
" x_w1nw2 = float(((o_w1nw2 - e_w1nw2)**2)/e_w1nw2)\n",
|
785
|
-
" x_nw1nw2 = float(((o_nw1nw2 - e_nw1nw2)**2)/e_nw1nw2)\n",
|
786
|
-
" print(\"Observed : \",o_w1w2,o_nw1w2,o_w1nw2,o_nw1nw2)\n",
|
787
|
-
" #print(\"Observed Total : \",o_w1w2+o_nw1w2+o_w1nw2+o_nw1nw2)\n",
|
788
|
-
" print(\"Expected : \",e_w1w2,e_nw1w2,e_w1nw2,e_nw1nw2)\n",
|
789
|
-
" #print(\"Expected Total : \",e_w1w2+e_nw1w2+e_w1nw2+e_nw1nw2)\n",
|
790
|
-
" chi_sq = float(x_w1w2 + x_nw1w2 + x_w1nw2 + x_nw1nw2)\n",
|
791
|
-
" print(\"X^2 : \",x_w1w2 , x_nw1w2 , x_w1nw2 , x_nw1nw2)\n",
|
792
|
-
" print(\"The chi-square score for '\"+i+\"' is : \"+str(chi_sq))"
|
793
|
-
]
|
794
|
-
}
|
795
|
-
],
|
796
|
-
"metadata": {
|
797
|
-
"kernelspec": {
|
798
|
-
"display_name": "Python 3 (ipykernel)",
|
799
|
-
"language": "python",
|
800
|
-
"name": "python3"
|
801
|
-
},
|
802
|
-
"language_info": {
|
803
|
-
"codemirror_mode": {
|
804
|
-
"name": "ipython",
|
805
|
-
"version": 3
|
806
|
-
},
|
807
|
-
"file_extension": ".py",
|
808
|
-
"mimetype": "text/x-python",
|
809
|
-
"name": "python",
|
810
|
-
"nbconvert_exporter": "python",
|
811
|
-
"pygments_lexer": "ipython3",
|
812
|
-
"version": "3.11.5"
|
813
|
-
}
|
814
|
-
},
|
815
|
-
"nbformat": 4,
|
816
|
-
"nbformat_minor": 5
|
817
|
-
}
|