noshot 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/ML/1. PCA - EDA/PCA-EDA.ipynb +207 -0
- noshot/data/ML TS XAI/ML/1. PCA - EDA/balance-scale.csv +626 -0
- noshot/data/ML TS XAI/ML/1. PCA - EDA/input.txt +625 -0
- noshot/data/ML TS XAI/ML/2. KNN Classifier/KNN.ipynb +287 -0
- noshot/data/ML TS XAI/ML/2. KNN Classifier/balance-scale.csv +626 -0
- noshot/data/ML TS XAI/ML/2. KNN Classifier/input.txt +625 -0
- noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/LDA.ipynb +83 -0
- noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/balance-scale.csv +626 -0
- noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/input.txt +625 -0
- noshot/data/ML TS XAI/ML/4. Linear Regression/Linear-Regression.ipynb +117 -0
- noshot/data/ML TS XAI/ML/4. Linear Regression/machine-data.csv +210 -0
- noshot/data/ML TS XAI/ML/5. Logistic Regression/Logistic-Regression.ipynb +137 -0
- noshot/data/ML TS XAI/ML/5. Logistic Regression/wine-dataset.csv +179 -0
- noshot/data/ML TS XAI/ML/6. Bayesian Classifier/Bayesian.ipynb +129 -0
- noshot/data/ML TS XAI/ML/6. Bayesian Classifier/wine-dataset.csv +179 -0
- noshot/data/ML TS XAI/TS/1. EDA - Handling Time Series Data/Handling TS Data.ipynb +784 -0
- noshot/data/ML TS XAI/TS/1. EDA - Handling Time Series Data/raw_sales.csv +29581 -0
- noshot/data/ML TS XAI/TS/2. Feature Engineering/Feature Engineering-.ipynb +1445 -0
- noshot/data/ML TS XAI/TS/3. Temporal Relationships/Exploring Temporal Relationships.ipynb +603 -0
- noshot/data/ML TS XAI/TS/4. Up-Down-Sampling and Interploation/Up-Down-Sampling.ipynb +721 -0
- noshot/data/ML TS XAI/TS/4. Up-Down-Sampling and Interploation/shampoo_sales.csv +37 -0
- noshot/data/ML TS XAI/TS/5. Stationarity - Trend - Seasonality/Stationarity-Trend-Seasonality.ipynb +392 -0
- noshot/data/ML TS XAI/TS/5. Stationarity - Trend - Seasonality/daily-min-temperatures.csv +3651 -0
- noshot/data/ML TS XAI/TS/5. Stationarity - Trend - Seasonality/daily-total-female-births.csv +366 -0
- noshot/data/ML TS XAI/TS/6. Autocorrelation - Partial Autocorrelation/ACF-PACF.ipynb +175 -0
- noshot/data/ML TS XAI/TS/6. Autocorrelation - Partial Autocorrelation/daily-min-temperatures.csv +3651 -0
- {noshot-0.1.7.dist-info → noshot-0.1.9.dist-info}/METADATA +2 -2
- noshot-0.1.9.dist-info/RECORD +35 -0
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(A) Breadth First Search.ipynb +0 -112
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(B) Depth First Search.ipynb +0 -111
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(C) Uniform Cost Search.ipynb +0 -134
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(D) Depth Limites Search.ipynb +0 -115
- noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(E) Iterative Deepening DFS.ipynb +0 -123
- noshot/data/ML TS XAI/AIDS/10. ANOVA/2_ANOVA.csv +0 -769
- noshot/data/ML TS XAI/AIDS/10. ANOVA/One Way ANOVA (Repeated Measure).ipynb +0 -126
- noshot/data/ML TS XAI/AIDS/10. ANOVA/One Way ANOVA.ipynb +0 -134
- noshot/data/ML TS XAI/AIDS/10. ANOVA/Sample 1 Way ANOVA Test.ipynb +0 -119
- noshot/data/ML TS XAI/AIDS/10. ANOVA/Two Way ANOVA.ipynb +0 -138
- noshot/data/ML TS XAI/AIDS/10. ANOVA/reaction_time.csv +0 -5
- noshot/data/ML TS XAI/AIDS/10. ANOVA/sample_data.csv +0 -16
- noshot/data/ML TS XAI/AIDS/10. ANOVA/sleep_deprivation.csv +0 -4
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/3_Linear.csv +0 -4802
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression LAB.ipynb +0 -113
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression New- sklearn.ipynb +0 -118
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression.ipynb +0 -148
- noshot/data/ML TS XAI/AIDS/11. Linear Regression/house_rate.csv +0 -22
- noshot/data/ML TS XAI/AIDS/12. Logistic Regression/Logistic Regression New- sklearn.ipynb +0 -128
- noshot/data/ML TS XAI/AIDS/12. Logistic Regression/Logistic Regression.ipynb +0 -145
- noshot/data/ML TS XAI/AIDS/12. Logistic Regression/default.csv +0 -1001
- noshot/data/ML TS XAI/AIDS/12. Logistic Regression/hours_scores_records.csv +0 -101
- noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(A) Astar.ipynb +0 -256
- noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(B) IDAstar.ipynb +0 -157
- noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(C) SMAstar.ipynb +0 -178
- noshot/data/ML TS XAI/AIDS/3. Genetic Algorithm/Genetic.ipynb +0 -95
- noshot/data/ML TS XAI/AIDS/4. Simulated Annealing/Simulated Annealing.ipynb +0 -74
- noshot/data/ML TS XAI/AIDS/4. Simulated Annealing/Sudoku Simulated Annealing.ipynb +0 -103
- noshot/data/ML TS XAI/AIDS/5. Alpha Beta Pruning/AlphaBetaPruning.ipynb +0 -182
- noshot/data/ML TS XAI/AIDS/6. Consraint Satisfaction Problems (CSP)/(A) CSP House Allocation.ipynb +0 -120
- noshot/data/ML TS XAI/AIDS/6. Consraint Satisfaction Problems (CSP)/(B) CSP Map Coloring.ipynb +0 -125
- noshot/data/ML TS XAI/AIDS/7. Random Sampling/Random Sampling.ipynb +0 -73
- noshot/data/ML TS XAI/AIDS/7. Random Sampling/height_weight_bmi.csv +0 -8389
- noshot/data/ML TS XAI/AIDS/8. Z Test/Z Test Hash Function.ipynb +0 -141
- noshot/data/ML TS XAI/AIDS/8. Z Test/Z Test.ipynb +0 -151
- noshot/data/ML TS XAI/AIDS/8. Z Test/height_weight_bmi.csv +0 -8389
- noshot/data/ML TS XAI/AIDS/9. T Test/1_heart.csv +0 -304
- noshot/data/ML TS XAI/AIDS/9. T Test/Independent T Test.ipynb +0 -119
- noshot/data/ML TS XAI/AIDS/9. T Test/Paired T Test.ipynb +0 -118
- noshot/data/ML TS XAI/AIDS/9. T Test/T Test Hash Function.ipynb +0 -142
- noshot/data/ML TS XAI/AIDS/9. T Test/T Test.ipynb +0 -158
- noshot/data/ML TS XAI/AIDS/9. T Test/height_weight_bmi.csv +0 -8389
- noshot/data/ML TS XAI/AIDS/9. T Test/iq_test.csv +0 -0
- noshot/data/ML TS XAI/AIDS/Others (AllinOne)/All In One.ipynb +0 -4581
- noshot/data/ML TS XAI/CN/1. Chat Application/chat.java +0 -81
- noshot/data/ML TS XAI/CN/1. Chat Application/output.png +0 -0
- noshot/data/ML TS XAI/CN/1. Chat Application/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/LAN.tcl +0 -65
- noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/analysis.awk +0 -44
- noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/output.png +0 -0
- noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/complexdcf.tcl +0 -229
- noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/output.png +0 -0
- noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/2. File Transfer/file_to_send.txt +0 -2
- noshot/data/ML TS XAI/CN/2. File Transfer/filetransfer.java +0 -119
- noshot/data/ML TS XAI/CN/2. File Transfer/output.png +0 -0
- noshot/data/ML TS XAI/CN/2. File Transfer/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/Client.class +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/MyServerImpl.class +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/MyServerIntf.class +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/Server.class +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/output.png +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/rmi.java +0 -56
- noshot/data/ML TS XAI/CN/4. Wired Network/output.png +0 -0
- noshot/data/ML TS XAI/CN/4. Wired Network/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/4. Wired Network/wired.awk +0 -25
- noshot/data/ML TS XAI/CN/4. Wired Network/wired.tcl +0 -81
- noshot/data/ML TS XAI/CN/5. Wireless Network/output.png +0 -0
- noshot/data/ML TS XAI/CN/5. Wireless Network/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/5. Wireless Network/wireless.awk +0 -27
- noshot/data/ML TS XAI/CN/5. Wireless Network/wireless.tcl +0 -153
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/analysis.awk +0 -27
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/output.png +0 -0
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/sack.tcl +0 -86
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/vegas.tcl +0 -86
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/analysis.awk +0 -28
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/output.png +0 -0
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/reno.tcl +0 -78
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/tahoe.tcl +0 -79
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/analysis.awk +0 -27
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/flow.tcl +0 -163
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/output.png +0 -0
- noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/DV.tcl +0 -111
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/LS.tcl +0 -106
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/analysis.awk +0 -36
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/output.png +0 -0
- noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/analysis.awk +0 -20
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/broadcast.tcl +0 -76
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/multicast.tcl +0 -103
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/output.png +0 -0
- noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/procedure.png +0 -0
- noshot/data/ML TS XAI/CN/9. DHCP/DHCP.java +0 -125
- noshot/data/ML TS XAI/CN/9. DHCP/output.png +0 -0
- noshot/data/ML TS XAI/CN/9. DHCP/procedure.png +0 -0
- noshot/data/ML TS XAI/NLP/NLP 1/1-Prereqs.py +0 -18
- noshot/data/ML TS XAI/NLP/NLP 1/2-Chi2test.py +0 -83
- noshot/data/ML TS XAI/NLP/NLP 1/2-T-test.py +0 -79
- noshot/data/ML TS XAI/NLP/NLP 1/3-WSD-nb.py +0 -53
- noshot/data/ML TS XAI/NLP/NLP 1/4-Hindle-Rooth.py +0 -53
- noshot/data/ML TS XAI/NLP/NLP 1/5-HMM-Trellis.py +0 -82
- noshot/data/ML TS XAI/NLP/NLP 1/6-HMM-Viterbi.py +0 -16
- noshot/data/ML TS XAI/NLP/NLP 1/7-PCFG-parsetree.py +0 -15
- noshot/data/ML TS XAI/NLP/NLP 1/Chi2test.ipynb +0 -285
- noshot/data/ML TS XAI/NLP/NLP 1/Hindle-Rooth.ipynb +0 -179
- noshot/data/ML TS XAI/NLP/NLP 1/Lab 10 - Text generator using LSTM.ipynb +0 -1461
- noshot/data/ML TS XAI/NLP/NLP 1/Lab 11 NMT.ipynb +0 -2307
- noshot/data/ML TS XAI/NLP/NLP 1/PCFG.ipynb +0 -134
- noshot/data/ML TS XAI/NLP/NLP 1/Prereqs.ipynb +0 -131
- noshot/data/ML TS XAI/NLP/NLP 1/T test.ipynb +0 -252
- noshot/data/ML TS XAI/NLP/NLP 1/TFIDF BOW.ipynb +0 -171
- noshot/data/ML TS XAI/NLP/NLP 1/Trellis.ipynb +0 -244
- noshot/data/ML TS XAI/NLP/NLP 1/WSD.ipynb +0 -645
- noshot/data/ML TS XAI/NLP/NLP 1/Word2Vec.ipynb +0 -93
- noshot/data/ML TS XAI/NLP/NLP 2/Lab01(tokenizer)/tokenizer.ipynb +0 -370
- noshot/data/ML TS XAI/NLP/NLP 2/Lab01(tokenizer)/training_tokenizer.txt +0 -6
- noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/exp0.ipynb +0 -274
- noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/lab2.ipynb +0 -905
- noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/test.txt +0 -1
- noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/tokenizing.ipynb +0 -272
- noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/collocation.ipynb +0 -332
- noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/lab3.ipynb +0 -549
- noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/nlp.txt +0 -1
- noshot/data/ML TS XAI/NLP/NLP 2/Lab04(collocation)/Lab4-NLP-Exp-2.ipynb +0 -817
- noshot/data/ML TS XAI/NLP/NLP 2/Lab04(collocation)/collocation.ipynb +0 -332
- noshot/data/ML TS XAI/NLP/NLP 2/Lab05(WSD)/NLP-Lab-5-Exp3.ipynb +0 -231
- noshot/data/ML TS XAI/NLP/NLP 2/Lab05(WSD)/word-sense-disambiguation.ipynb +0 -507
- noshot/data/ML TS XAI/NLP/NLP 2/Lab06(additional-exercise)/lab6.ipynb +0 -134
- noshot/data/ML TS XAI/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP Exp 4.ipynb +0 -255
- noshot/data/ML TS XAI/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP_Exp_5.ipynb +0 -159
- noshot/data/ML TS XAI/NLP/NLP 2/Lab08(PCFG)/PCFG.ipynb +0 -282
- noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/Lab 9 - MLP classifier.ipynb +0 -670
- noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/MLP-alternative-code.ipynb +0 -613
- noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/hindle-rooth-algorithm.ipynb +0 -74
- noshot/data/ML TS XAI/NLP/NLP 2/Lab10(LSTM)/Lab_10_Text_generator_using_LSTM.ipynb +0 -480
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Machine-translation.ipynb +0 -445
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Viterbi-PCFG.ipynb +0 -105
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/corpora_tools.py +0 -87
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/data_utils.py +0 -11
- noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/train_translator.py +0 -83
- noshot/data/ML TS XAI/NLP/NLP 2/Lab12(Information-Extraction)/Information_Extraction.ipynb +0 -201
- noshot/data/ML TS XAI/NLP/NLP 3/Backtrack-without-Verbitri.ipynb +0 -185
- noshot/data/ML TS XAI/NLP/NLP 3/Backward-Procedure.ipynb +0 -597
- noshot/data/ML TS XAI/NLP/NLP 3/Bag_of.ipynb +0 -1422
- noshot/data/ML TS XAI/NLP/NLP 3/CYK-algorithm.ipynb +0 -1067
- noshot/data/ML TS XAI/NLP/NLP 3/Forward-Procedure.ipynb +0 -477
- noshot/data/ML TS XAI/NLP/NLP 3/LSTM.ipynb +0 -1290
- noshot/data/ML TS XAI/NLP/NLP 3/Lab 10 - Text generator using LSTM.ipynb +0 -1461
- noshot/data/ML TS XAI/NLP/NLP 3/Lab 11 NMT.ipynb +0 -2307
- noshot/data/ML TS XAI/NLP/NLP 3/NLP-LAB-4.ipynb +0 -216
- noshot/data/ML TS XAI/NLP/NLP 3/NLP-LAB-5.ipynb +0 -216
- noshot/data/ML TS XAI/NLP/NLP 3/abc.txt +0 -6
- noshot/data/ML TS XAI/NLP/NLP 3/ex-1-nltk.ipynb +0 -711
- noshot/data/ML TS XAI/NLP/NLP 3/ex-2-nlp.ipynb +0 -267
- noshot/data/ML TS XAI/NLP/NLP 3/exp8&9.ipynb +0 -305
- noshot/data/ML TS XAI/NLP/NLP 3/hind.ipynb +0 -287
- noshot/data/ML TS XAI/NLP/NLP 3/lab66.ipynb +0 -752
- noshot/data/ML TS XAI/NLP/NLP 3/leb_3.ipynb +0 -612
- noshot/data/ML TS XAI/NLP/NLP 3/naive_bayes_classifier.pkl +0 -0
- noshot/data/ML TS XAI/NLP/NLP 3/nlp_leb_1.ipynb +0 -3008
- noshot/data/ML TS XAI/NLP/NLP 3/nlp_leb_2.ipynb +0 -3095
- noshot/data/ML TS XAI/NLP/NLP 3/nlplab-9.ipynb +0 -295
- noshot/data/ML TS XAI/NLP/NLP 3/nltk-ex-4.ipynb +0 -506
- noshot/data/ML TS XAI/NLP/NLP 3/text1.txt +0 -48
- noshot/data/ML TS XAI/NLP/NLP 3/text2.txt +0 -8
- noshot/data/ML TS XAI/NLP/NLP 3/text3.txt +0 -48
- noshot/data/ML TS XAI/NLP/NLP 3/translation-rnn.ipynb +0 -812
- noshot/data/ML TS XAI/NLP/NLP 3/word2vector.ipynb +0 -173
- noshot/data/ML TS XAI/NLP/NLP 4/Backward Procedure Algorithm.ipynb +0 -179
- noshot/data/ML TS XAI/NLP/NLP 4/Chi Square Collocation.ipynb +0 -208
- noshot/data/ML TS XAI/NLP/NLP 4/Collocation (T test).ipynb +0 -188
- noshot/data/ML TS XAI/NLP/NLP 4/Experiment 1.ipynb +0 -437
- noshot/data/ML TS XAI/NLP/NLP 4/Forward Procedure Algorithm.ipynb +0 -132
- noshot/data/ML TS XAI/NLP/NLP 4/Hindle Rooth.ipynb +0 -414
- noshot/data/ML TS XAI/NLP/NLP 4/MachineTranslation.ipynb +0 -368
- noshot/data/ML TS XAI/NLP/NLP 4/Multi Layer Perceptron using MLPClassifier.ipynb +0 -86
- noshot/data/ML TS XAI/NLP/NLP 4/Multi Layer Perceptron using Tensorflow.ipynb +0 -112
- noshot/data/ML TS XAI/NLP/NLP 4/PCFG Inside Probability.ipynb +0 -451
- noshot/data/ML TS XAI/NLP/NLP 4/Text Generation using LSTM.ipynb +0 -297
- noshot/data/ML TS XAI/NLP/NLP 4/Viterbi.ipynb +0 -310
- noshot/data/ML TS XAI/NLP/NLP 4/Word Sense Disambiguation.ipynb +0 -335
- noshot/data/ML TS XAI/NLP/NLP 5/10.Text Generation using LSTM.ipynb +0 -316
- noshot/data/ML TS XAI/NLP/NLP 5/11.Machine Translation.ipynb +0 -868
- noshot/data/ML TS XAI/NLP/NLP 5/2.T and Chi2 Test.ipynb +0 -204
- noshot/data/ML TS XAI/NLP/NLP 5/3.Word Sense Diambiguation.ipynb +0 -234
- noshot/data/ML TS XAI/NLP/NLP 5/4.Hinddle and Rooth.ipynb +0 -128
- noshot/data/ML TS XAI/NLP/NLP 5/5.Forward and Backward.ipynb +0 -149
- noshot/data/ML TS XAI/NLP/NLP 5/6.Viterbi.ipynb +0 -111
- noshot/data/ML TS XAI/NLP/NLP 5/7.PCFG Parse Tree.ipynb +0 -134
- noshot/data/ML TS XAI/NLP/NLP 5/7.PCFG using cyk.ipynb +0 -101
- noshot/data/ML TS XAI/NLP/NLP 5/8.Bag of words and TF-IDF.ipynb +0 -310
- noshot/data/ML TS XAI/NLP/NLP 5/9.Word2Vector.ipynb +0 -78
- noshot/data/ML TS XAI/NLP/NLP 5/NLP ALL In One.ipynb +0 -2619
- noshot/data/ML TS XAI/NLP/NLP 5/sample1.txt +0 -15
- noshot/data/ML TS XAI/NLP/NLP 5/sample2.txt +0 -4
- noshot/data/ML TS XAI/NLP/NLP 5/word2vec_model.bin +0 -0
- noshot/data/ML TS XAI/NLP/NLP 6/1. Tokenize, Tagging, NER, Parse Tree.ipynb +0 -312
- noshot/data/ML TS XAI/NLP/NLP 6/2. T Test and Chi2 Test.ipynb +0 -185
- noshot/data/ML TS XAI/NLP/NLP 6/3. Naive Bayes WSD.ipynb +0 -199
- noshot/data/ML TS XAI/NLP/NLP 6/4. Hinddle and Rooth.ipynb +0 -151
- noshot/data/ML TS XAI/NLP/NLP 6/5 and 6 FWD, BWD, Viterbi.ipynb +0 -164
- noshot/data/ML TS XAI/NLP/NLP 6/7. PCFG using CYK.ipynb +0 -383
- noshot/data/ML TS XAI/NLP/NLP 6/8. BOW and TF-IDF.ipynb +0 -252
- noshot/data/ML TS XAI/Ubuntu CN Lab.iso +0 -0
- noshot-0.1.7.dist-info/RECORD +0 -216
- {noshot-0.1.7.dist-info → noshot-0.1.9.dist-info}/LICENSE.txt +0 -0
- {noshot-0.1.7.dist-info → noshot-0.1.9.dist-info}/WHEEL +0 -0
- {noshot-0.1.7.dist-info → noshot-0.1.9.dist-info}/top_level.txt +0 -0
@@ -1,507 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "code",
|
5
|
-
"execution_count": 3,
|
6
|
-
"id": "aad973c4",
|
7
|
-
"metadata": {},
|
8
|
-
"outputs": [
|
9
|
-
{
|
10
|
-
"name": "stdout",
|
11
|
-
"output_type": "stream",
|
12
|
-
"text": [
|
13
|
-
"The sense of 'Chair' in the given text is: furniture\n"
|
14
|
-
]
|
15
|
-
}
|
16
|
-
],
|
17
|
-
"source": [
|
18
|
-
"import nltk\n",
|
19
|
-
"import math\n",
|
20
|
-
"from nltk.corpus import wordnet as wn\n",
|
21
|
-
"\n",
|
22
|
-
"# Define the senses and their priors\n",
|
23
|
-
"senses = {\n",
|
24
|
-
" 'furniture': 3/5,\n",
|
25
|
-
" 'position': 2/5\n",
|
26
|
-
"}\n",
|
27
|
-
"\n",
|
28
|
-
"# Read the input text from a file\n",
|
29
|
-
"def read_input_text(filename):\n",
|
30
|
-
" with open(filename, 'r') as file:\n",
|
31
|
-
" text = file.read()\n",
|
32
|
-
" return text\n",
|
33
|
-
"\n",
|
34
|
-
"# Tokenize the input text\n",
|
35
|
-
"def tokenize_text(text):\n",
|
36
|
-
" return nltk.word_tokenize(text)\n",
|
37
|
-
"\n",
|
38
|
-
"# Calculate the conditional probabilities for each sense\n",
|
39
|
-
"def calculate_conditional_probabilities(word, sense):\n",
|
40
|
-
" sense_count = len(wn.synsets(sense))\n",
|
41
|
-
" word_count = sense.count(word) + 1\n",
|
42
|
-
" v_size = 17 # Assuming V size is 17\n",
|
43
|
-
"\n",
|
44
|
-
" return math.log(senses[sense]) + math.log(word_count / (sense_count + v_size))\n",
|
45
|
-
"\n",
|
46
|
-
"# Calculate the sense scores\n",
|
47
|
-
"def calculate_sense_scores(tokens):\n",
|
48
|
-
" scores = {sense: 0 for sense in senses}\n",
|
49
|
-
"\n",
|
50
|
-
" for token in tokens:\n",
|
51
|
-
" for sense in senses:\n",
|
52
|
-
" scores[sense] += calculate_conditional_probabilities(token, sense)\n",
|
53
|
-
"\n",
|
54
|
-
" return scores\n",
|
55
|
-
"\n",
|
56
|
-
"# Determine the most probable sense\n",
|
57
|
-
"def disambiguate_word_sense(text):\n",
|
58
|
-
" tokens = tokenize_text(text)\n",
|
59
|
-
" sense_scores = calculate_sense_scores(tokens)\n",
|
60
|
-
"\n",
|
61
|
-
" # Find the sense with the highest score\n",
|
62
|
-
" max_sense = max(sense_scores, key=sense_scores.get)\n",
|
63
|
-
" return max_sense\n",
|
64
|
-
"\n",
|
65
|
-
"if __name__ == \"__main__\":\n",
|
66
|
-
" input_filename = 'input.txt'\n",
|
67
|
-
" input_text = read_input_text(input_filename)\n",
|
68
|
-
" result = disambiguate_word_sense(input_text)\n",
|
69
|
-
" print(f\"The sense of 'Chair' in the given text is: {result}\")\n"
|
70
|
-
]
|
71
|
-
},
|
72
|
-
{
|
73
|
-
"cell_type": "markdown",
|
74
|
-
"id": "6210aa4b",
|
75
|
-
"metadata": {},
|
76
|
-
"source": [
|
77
|
-
"**Input text (`input.txt`)**\n",
|
78
|
-
"\n",
|
79
|
-
"```txt\n",
|
80
|
-
"Put, coat, back, Chair, sat, down Furniture\n",
|
81
|
-
"Chair, made, timber, company Furniture\n",
|
82
|
-
"Chair, institute, best Position\n",
|
83
|
-
"Award, IT, Chair Position\n",
|
84
|
-
"Type, different, Chair, Award, fun, use Furniture\n",
|
85
|
-
"Award, Chair, IT, company ?\n",
|
86
|
-
"```"
|
87
|
-
]
|
88
|
-
},
|
89
|
-
{
|
90
|
-
"cell_type": "code",
|
91
|
-
"execution_count": 5,
|
92
|
-
"id": "9e59efe1",
|
93
|
-
"metadata": {},
|
94
|
-
"outputs": [
|
95
|
-
{
|
96
|
-
"name": "stdout",
|
97
|
-
"output_type": "stream",
|
98
|
-
"text": [
|
99
|
-
"The sense of 'Bass' in the last context is: Fish\n"
|
100
|
-
]
|
101
|
-
}
|
102
|
-
],
|
103
|
-
"source": [
|
104
|
-
"from collections import defaultdict\n",
|
105
|
-
"\n",
|
106
|
-
"# Define the senses and their priors\n",
|
107
|
-
"senses = {\n",
|
108
|
-
" 'Fish': 3/5,\n",
|
109
|
-
" 'music': 2/5\n",
|
110
|
-
"}\n",
|
111
|
-
"\n",
|
112
|
-
"# Laplace smoothing parameter\n",
|
113
|
-
"alpha = 1\n",
|
114
|
-
"\n",
|
115
|
-
"# Read the input file\n",
|
116
|
-
"def read_input_file(filename):\n",
|
117
|
-
" contexts = []\n",
|
118
|
-
" with open(filename, 'r') as file:\n",
|
119
|
-
" for line in file:\n",
|
120
|
-
" parts = line.strip().split(' ')\n",
|
121
|
-
" context = parts[0].split(', ')\n",
|
122
|
-
" sense = parts[1]\n",
|
123
|
-
" contexts.append((context, sense))\n",
|
124
|
-
" return contexts\n",
|
125
|
-
"\n",
|
126
|
-
"# Calculate the conditional probabilities with Laplace smoothing\n",
|
127
|
-
"def calculate_conditional_probabilities(word, sense, context_list):\n",
|
128
|
-
" sense_count = sum(1 for _, s in context_list if s == sense)\n",
|
129
|
-
" word_count = sum(1 for c, s in context_list if s == sense and word in c) + alpha\n",
|
130
|
-
" v_size = len(context_list[0][0]) # Assuming V size is equal to the context size\n",
|
131
|
-
"\n",
|
132
|
-
" return (word_count / (sense_count + v_size * alpha)) * senses[sense]\n",
|
133
|
-
"\n",
|
134
|
-
"# Determine the most probable sense using the Decision Rule\n",
|
135
|
-
"def disambiguate_word_sense(context, context_list):\n",
|
136
|
-
" sense_scores = {sense: 0 for sense in senses}\n",
|
137
|
-
" \n",
|
138
|
-
" for word in context:\n",
|
139
|
-
" for sense in senses:\n",
|
140
|
-
" sense_scores[sense] += calculate_conditional_probabilities(word, sense, context_list)\n",
|
141
|
-
"\n",
|
142
|
-
" # Find the sense with the highest score\n",
|
143
|
-
" max_sense = max(sense_scores, key=sense_scores.get)\n",
|
144
|
-
" return max_sense\n",
|
145
|
-
"\n",
|
146
|
-
"if __name__ == \"__main__\":\n",
|
147
|
-
" input_filename = 'input.txt' \n",
|
148
|
-
" input_data = read_input_file(input_filename)\n",
|
149
|
-
" context_to_disambiguate = [word.lower() for word in input_data[-1][0]] # Use the last context\n",
|
150
|
-
" \n",
|
151
|
-
" result = disambiguate_word_sense(context_to_disambiguate, input_data[:-1]) # Exclude the last context for training\n",
|
152
|
-
" print(f\"The sense of 'Bass' in the last context is: {result}\")\n"
|
153
|
-
]
|
154
|
-
},
|
155
|
-
{
|
156
|
-
"cell_type": "markdown",
|
157
|
-
"id": "4022ddef",
|
158
|
-
"metadata": {},
|
159
|
-
"source": [
|
160
|
-
"**Input text (`input1.txt`)**\n",
|
161
|
-
"\n",
|
162
|
-
"```txt\n",
|
163
|
-
"Bass, eat, super Fish\n",
|
164
|
-
"Bass, lunch, excellent Fish\n",
|
165
|
-
"Bass, ate, like\tFish\n",
|
166
|
-
"Bass, play, music music\n",
|
167
|
-
"Bass, interest, play music\n",
|
168
|
-
"Bass, super, excellent, play ?\n",
|
169
|
-
"```"
|
170
|
-
]
|
171
|
-
},
|
172
|
-
{
|
173
|
-
"cell_type": "code",
|
174
|
-
"execution_count": 9,
|
175
|
-
"id": "67fe0295",
|
176
|
-
"metadata": {},
|
177
|
-
"outputs": [
|
178
|
-
{
|
179
|
-
"name": "stdout",
|
180
|
-
"output_type": "stream",
|
181
|
-
"text": [
|
182
|
-
"The sense of 'Chair' in the given text is: furniture\n",
|
183
|
-
"Sense Scores:\n",
|
184
|
-
"furniture: -176.86226384643197\n",
|
185
|
-
"position: -232.52521725490584\n"
|
186
|
-
]
|
187
|
-
}
|
188
|
-
],
|
189
|
-
"source": [
|
190
|
-
"import nltk\n",
|
191
|
-
"import math\n",
|
192
|
-
"from nltk.corpus import wordnet as wn\n",
|
193
|
-
"\n",
|
194
|
-
"# Define the senses and their priors\n",
|
195
|
-
"senses = {\n",
|
196
|
-
" 'furniture': 3/5,\n",
|
197
|
-
" 'position': 2/5\n",
|
198
|
-
"}\n",
|
199
|
-
"\n",
|
200
|
-
"# Read the input text from a file\n",
|
201
|
-
"def read_input_text(filename):\n",
|
202
|
-
" with open(filename, 'r') as file:\n",
|
203
|
-
" text = file.read()\n",
|
204
|
-
" return text\n",
|
205
|
-
"\n",
|
206
|
-
"# Tokenize the input text\n",
|
207
|
-
"def tokenize_text(text):\n",
|
208
|
-
" return nltk.word_tokenize(text)\n",
|
209
|
-
"\n",
|
210
|
-
"# Calculate the conditional probabilities for each sense\n",
|
211
|
-
"def calculate_conditional_probabilities(word, sense):\n",
|
212
|
-
" sense_count = len(wn.synsets(sense))\n",
|
213
|
-
" word_count = sense.count(word) + 1\n",
|
214
|
-
" v_size = 17 # Assuming V size is 17\n",
|
215
|
-
"\n",
|
216
|
-
" return math.log(senses[sense]) + math.log(word_count / (sense_count + v_size))\n",
|
217
|
-
"\n",
|
218
|
-
"# Calculate the sense scores\n",
|
219
|
-
"def calculate_sense_scores(tokens):\n",
|
220
|
-
" scores = {sense: 0 for sense in senses}\n",
|
221
|
-
"\n",
|
222
|
-
" for token in tokens:\n",
|
223
|
-
" for sense in senses:\n",
|
224
|
-
" scores[sense] += calculate_conditional_probabilities(token, sense)\n",
|
225
|
-
"\n",
|
226
|
-
" return scores\n",
|
227
|
-
"\n",
|
228
|
-
"# Determine the most probable sense\n",
|
229
|
-
"def disambiguate_word_sense(text):\n",
|
230
|
-
" tokens = tokenize_text(text)\n",
|
231
|
-
" sense_scores = calculate_sense_scores(tokens)\n",
|
232
|
-
"\n",
|
233
|
-
" # Find the sense with the highest score\n",
|
234
|
-
" max_sense = max(sense_scores, key=sense_scores.get)\n",
|
235
|
-
" return max_sense, sense_scores # Return both the sense and the scores\n",
|
236
|
-
"\n",
|
237
|
-
"if __name__ == \"__main__\":\n",
|
238
|
-
" input_filename = 'input.txt'\n",
|
239
|
-
" input_text = read_input_text(input_filename)\n",
|
240
|
-
" result, scores = disambiguate_word_sense(input_text)\n",
|
241
|
-
" \n",
|
242
|
-
" print(f\"The sense of 'Chair' in the given text is: {result}\")\n",
|
243
|
-
" print(\"Sense Scores:\")\n",
|
244
|
-
" for sense, score in scores.items():\n",
|
245
|
-
" print(f\"{sense}: {score}\")"
|
246
|
-
]
|
247
|
-
},
|
248
|
-
{
|
249
|
-
"cell_type": "code",
|
250
|
-
"execution_count": 7,
|
251
|
-
"id": "4c53ed10",
|
252
|
-
"metadata": {},
|
253
|
-
"outputs": [
|
254
|
-
{
|
255
|
-
"name": "stdout",
|
256
|
-
"output_type": "stream",
|
257
|
-
"text": [
|
258
|
-
"The sense of 'Bass' in the last context is: Fish\n",
|
259
|
-
"Sense Scores:\n",
|
260
|
-
"Fish: 0.6\n",
|
261
|
-
"music: 0.4\n"
|
262
|
-
]
|
263
|
-
}
|
264
|
-
],
|
265
|
-
"source": [
|
266
|
-
"from collections import defaultdict\n",
|
267
|
-
"\n",
|
268
|
-
"# Define the senses and their priors\n",
|
269
|
-
"senses = {\n",
|
270
|
-
" 'Fish': 3/5,\n",
|
271
|
-
" 'music': 2/5\n",
|
272
|
-
"}\n",
|
273
|
-
"\n",
|
274
|
-
"# Laplace smoothing parameter\n",
|
275
|
-
"alpha = 1\n",
|
276
|
-
"\n",
|
277
|
-
"# Read the input file\n",
|
278
|
-
"def read_input_file(filename):\n",
|
279
|
-
" contexts = []\n",
|
280
|
-
" with open(filename, 'r') as file:\n",
|
281
|
-
" for line in file:\n",
|
282
|
-
" parts = line.strip().split(' ')\n",
|
283
|
-
" context = parts[0].split(', ')\n",
|
284
|
-
" sense = parts[1]\n",
|
285
|
-
" contexts.append((context, sense))\n",
|
286
|
-
" return contexts\n",
|
287
|
-
"\n",
|
288
|
-
"# Calculate the conditional probabilities with Laplace smoothing\n",
|
289
|
-
"def calculate_conditional_probabilities(word, sense, context_list):\n",
|
290
|
-
" sense_count = sum(1 for _, s in context_list if s == sense)\n",
|
291
|
-
" word_count = sum(1 for c, s in context_list if s == sense and word in c) + alpha\n",
|
292
|
-
" v_size = len(context_list[0][0]) # Assuming V size is equal to the context size\n",
|
293
|
-
"\n",
|
294
|
-
" return (word_count / (sense_count + v_size * alpha)) * senses[sense]\n",
|
295
|
-
"\n",
|
296
|
-
"# Determine the most probable sense using the Decision Rule\n",
|
297
|
-
"def disambiguate_word_sense(context, context_list):\n",
|
298
|
-
" sense_scores = {sense: 0 for sense in senses}\n",
|
299
|
-
" \n",
|
300
|
-
" for word in context:\n",
|
301
|
-
" for sense in senses:\n",
|
302
|
-
" sense_scores[sense] += calculate_conditional_probabilities(word, sense, context_list)\n",
|
303
|
-
"\n",
|
304
|
-
" # Find the sense with the highest score\n",
|
305
|
-
" max_sense = max(sense_scores, key=sense_scores.get)\n",
|
306
|
-
" return max_sense, sense_scores # Return both the sense and the scores\n",
|
307
|
-
"\n",
|
308
|
-
"if __name__ == \"__main__\":\n",
|
309
|
-
" input_filename = 'input.txt' \n",
|
310
|
-
" input_data = read_input_file(input_filename)\n",
|
311
|
-
" context_to_disambiguate = [word.lower() for word in input_data[-1][0]] # Use the last context\n",
|
312
|
-
" \n",
|
313
|
-
" result, scores = disambiguate_word_sense(context_to_disambiguate, input_data[:-1]) # Exclude the last context for training\n",
|
314
|
-
" print(f\"The sense of 'Bass' in the last context is: {result}\")\n",
|
315
|
-
" print(\"Sense Scores:\")\n",
|
316
|
-
" for sense, score in scores.items():\n",
|
317
|
-
" print(f\"{sense}: {score}\")"
|
318
|
-
]
|
319
|
-
},
|
320
|
-
{
|
321
|
-
"cell_type": "code",
|
322
|
-
"execution_count": 8,
|
323
|
-
"id": "21ab75a6",
|
324
|
-
"metadata": {},
|
325
|
-
"outputs": [
|
326
|
-
{
|
327
|
-
"name": "stdout",
|
328
|
-
"output_type": "stream",
|
329
|
-
"text": [
|
330
|
-
"The sense of 'Bass' in the last context is: Fish\n",
|
331
|
-
"Sense Scores:\n",
|
332
|
-
"Fish: 0.6\n",
|
333
|
-
"music: 0.4\n"
|
334
|
-
]
|
335
|
-
}
|
336
|
-
],
|
337
|
-
"source": [
|
338
|
-
"from collections import defaultdict\n",
|
339
|
-
"\n",
|
340
|
-
"# Read the input file\n",
|
341
|
-
"def read_input_file(filename):\n",
|
342
|
-
" contexts = []\n",
|
343
|
-
" with open(filename, 'r') as file:\n",
|
344
|
-
" for line in file:\n",
|
345
|
-
" parts = line.strip().split(' ')\n",
|
346
|
-
" context = parts[0].split(', ')\n",
|
347
|
-
" sense = parts[1]\n",
|
348
|
-
" contexts.append((context, sense))\n",
|
349
|
-
" return contexts\n",
|
350
|
-
"\n",
|
351
|
-
"# Calculate the conditional probabilities with Laplace smoothing\n",
|
352
|
-
"def calculate_conditional_probabilities(word, sense, context_list, senses, alpha):\n",
|
353
|
-
" sense_count = sum(1 for _, s in context_list if s == sense)\n",
|
354
|
-
" word_count = sum(1 for c, s in context_list if s == sense and word in c) + alpha\n",
|
355
|
-
" v_size = len(context_list[0][0]) # Assuming V size is equal to the context size\n",
|
356
|
-
"\n",
|
357
|
-
" return (word_count / (sense_count + v_size * alpha)) * senses[sense]\n",
|
358
|
-
"\n",
|
359
|
-
"# Determine the most probable sense using the Decision Rule\n",
|
360
|
-
"def disambiguate_word_sense(context, context_list, senses, alpha):\n",
|
361
|
-
" sense_scores = {sense: 0 for sense in senses}\n",
|
362
|
-
" \n",
|
363
|
-
" for word in context:\n",
|
364
|
-
" for sense in senses:\n",
|
365
|
-
" sense_scores[sense] += calculate_conditional_probabilities(word, sense, context_list, senses, alpha)\n",
|
366
|
-
"\n",
|
367
|
-
" # Find the sense with the highest score\n",
|
368
|
-
" max_sense = max(sense_scores, key=sense_scores.get)\n",
|
369
|
-
" return max_sense, sense_scores # Return both the sense and the scores\n",
|
370
|
-
"\n",
|
371
|
-
"if __name__ == \"__main__\":\n",
|
372
|
-
" # Input senses and priors\n",
|
373
|
-
" senses = {\n",
|
374
|
-
" 'Fish': 3/5,\n",
|
375
|
-
" 'music': 2/5\n",
|
376
|
-
" }\n",
|
377
|
-
"\n",
|
378
|
-
" # Laplace smoothing parameter\n",
|
379
|
-
" alpha = 1\n",
|
380
|
-
"\n",
|
381
|
-
" input_filename = 'input.txt' \n",
|
382
|
-
" input_data = read_input_file(input_filename)\n",
|
383
|
-
" context_to_disambiguate = [word.lower() for word in input_data[-1][0]] # Use the last context\n",
|
384
|
-
" \n",
|
385
|
-
" result, scores = disambiguate_word_sense(context_to_disambiguate, input_data[:-1], senses, alpha) # Exclude the last context for training\n",
|
386
|
-
" print(f\"The sense of 'Bass' in the last context is: {result}\")\n",
|
387
|
-
" print(\"Sense Scores:\")\n",
|
388
|
-
" for sense, score in scores.items():\n",
|
389
|
-
" print(f\"{sense}: {score}\")\n"
|
390
|
-
]
|
391
|
-
},
|
392
|
-
{
|
393
|
-
"cell_type": "code",
|
394
|
-
"execution_count": 11,
|
395
|
-
"id": "432badc4",
|
396
|
-
"metadata": {},
|
397
|
-
"outputs": [
|
398
|
-
{
|
399
|
-
"name": "stdout",
|
400
|
-
"output_type": "stream",
|
401
|
-
"text": [
|
402
|
-
"The sense of 'position' in the last context is: furniture\n",
|
403
|
-
"Sense Scores:\n",
|
404
|
-
"furniture: 0.6\n",
|
405
|
-
"position: 0.4\n"
|
406
|
-
]
|
407
|
-
}
|
408
|
-
],
|
409
|
-
"source": [
|
410
|
-
"from collections import defaultdict\n",
|
411
|
-
"\n",
|
412
|
-
"# Read the senses and their priors from an input file\n",
|
413
|
-
"def read_senses_from_file(filename):\n",
|
414
|
-
" senses = {}\n",
|
415
|
-
" with open(filename, 'r') as file:\n",
|
416
|
-
" for line in file:\n",
|
417
|
-
" parts = line.strip().split(' ')\n",
|
418
|
-
" sense = parts[0]\n",
|
419
|
-
" prior = float(parts[1])\n",
|
420
|
-
" senses[sense] = prior\n",
|
421
|
-
" return senses\n",
|
422
|
-
"\n",
|
423
|
-
"# Laplace smoothing parameter\n",
|
424
|
-
"alpha = 1\n",
|
425
|
-
"\n",
|
426
|
-
"# Read the input file\n",
|
427
|
-
"def read_input_file(filename):\n",
|
428
|
-
" contexts = []\n",
|
429
|
-
" with open(filename, 'r') as file:\n",
|
430
|
-
" for line in file:\n",
|
431
|
-
" parts = line.strip().split(' ')\n",
|
432
|
-
" context = parts[0].split(', ')\n",
|
433
|
-
" sense = parts[1]\n",
|
434
|
-
" contexts.append((context, sense))\n",
|
435
|
-
" return contexts\n",
|
436
|
-
"\n",
|
437
|
-
"# Calculate the conditional probabilities with Laplace smoothing\n",
|
438
|
-
"def calculate_conditional_probabilities(word, sense, context_list, senses):\n",
|
439
|
-
" sense_count = sum(1 for _, s in context_list if s == sense)\n",
|
440
|
-
" word_count = sum(1 for c, s in context_list if s == sense and word in c) + alpha\n",
|
441
|
-
" v_size = len(context_list[0][0]) # Assuming V size is equal to the context size\n",
|
442
|
-
"\n",
|
443
|
-
" return (word_count / (sense_count + v_size * alpha)) * senses[sense]\n",
|
444
|
-
"\n",
|
445
|
-
"# Determine the most probable sense using the Decision Rule\n",
|
446
|
-
"def disambiguate_word_sense(context, context_list, senses):\n",
|
447
|
-
" sense_scores = {sense: 0 for sense in senses}\n",
|
448
|
-
" \n",
|
449
|
-
" for word in context:\n",
|
450
|
-
" for sense in senses:\n",
|
451
|
-
" sense_scores[sense] += calculate_conditional_probabilities(word, sense, context_list, senses)\n",
|
452
|
-
"\n",
|
453
|
-
" # Find the sense with the highest score\n",
|
454
|
-
" max_sense = max(sense_scores, key=sense_scores.get)\n",
|
455
|
-
" return max_sense, sense_scores # Return both the sense and the scores\n",
|
456
|
-
"\n",
|
457
|
-
"if __name__ == \"__main__\":\n",
|
458
|
-
" input_filename = 'input.txt' # Replace with the actual input file name\n",
|
459
|
-
" senses_filename = 'senses_input.txt' # Replace with the actual senses input file name\n",
|
460
|
-
" \n",
|
461
|
-
" senses = read_senses_from_file(senses_filename)\n",
|
462
|
-
" input_data = read_input_file(input_filename)\n",
|
463
|
-
" context_to_disambiguate = [word.lower() for word in input_data[-1][0]] # Use the last context\n",
|
464
|
-
" \n",
|
465
|
-
" result, scores = disambiguate_word_sense(context_to_disambiguate, input_data[:-1], senses) # Exclude the last context for training\n",
|
466
|
-
" print(f\"The sense of '{sense}' in the last context is: {result}\")\n",
|
467
|
-
" print(\"Sense Scores:\")\n",
|
468
|
-
" for sense, score in scores.items():\n",
|
469
|
-
" print(f\"{sense}: {score}\")\n"
|
470
|
-
]
|
471
|
-
},
|
472
|
-
{
|
473
|
-
"cell_type": "markdown",
|
474
|
-
"id": "05ec022f",
|
475
|
-
"metadata": {},
|
476
|
-
"source": [
|
477
|
-
"**Input (`senses_input.txt`)**\n",
|
478
|
-
"\n",
|
479
|
-
"```txt\n",
|
480
|
-
"furniture 0.6\n",
|
481
|
-
"position 0.4\n",
|
482
|
-
"```"
|
483
|
-
]
|
484
|
-
}
|
485
|
-
],
|
486
|
-
"metadata": {
|
487
|
-
"kernelspec": {
|
488
|
-
"display_name": "Python 3",
|
489
|
-
"language": "python",
|
490
|
-
"name": "python3"
|
491
|
-
},
|
492
|
-
"language_info": {
|
493
|
-
"codemirror_mode": {
|
494
|
-
"name": "ipython",
|
495
|
-
"version": 3
|
496
|
-
},
|
497
|
-
"file_extension": ".py",
|
498
|
-
"mimetype": "text/x-python",
|
499
|
-
"name": "python",
|
500
|
-
"nbconvert_exporter": "python",
|
501
|
-
"pygments_lexer": "ipython3",
|
502
|
-
"version": "3.8.8"
|
503
|
-
}
|
504
|
-
},
|
505
|
-
"nbformat": 4,
|
506
|
-
"nbformat_minor": 5
|
507
|
-
}
|
@@ -1,134 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "markdown",
|
5
|
-
"id": "db74bf44",
|
6
|
-
"metadata": {},
|
7
|
-
"source": [
|
8
|
-
"### Todo\n",
|
9
|
-
"\n",
|
10
|
-
"1. Get text from a file\n",
|
11
|
-
"2. Get all possible bi grams\n",
|
12
|
-
"3. Apply smoothing(add-one,add-delta) for each bigram\n",
|
13
|
-
"4. Given random bi gram as an input\n",
|
14
|
-
"5. Print the probability of the given bi gram via all the three ,methods\n",
|
15
|
-
"\n",
|
16
|
-
"- add-one ---->laplace\n",
|
17
|
-
"- add-delta---->lidstone,jefrey's"
|
18
|
-
]
|
19
|
-
},
|
20
|
-
{
|
21
|
-
"cell_type": "markdown",
|
22
|
-
"id": "a0228d7f",
|
23
|
-
"metadata": {},
|
24
|
-
"source": [
|
25
|
-
"Input text\n",
|
26
|
-
"\n",
|
27
|
-
"`This is a sample text for testing the bigram probability calculation using smoothing techniques. The quick brown fox jumps over the lazy dog. The lazy dog barks loudly. Testing is essential for evaluating the performance of natural language processing models.`"
|
28
|
-
]
|
29
|
-
},
|
30
|
-
{
|
31
|
-
"cell_type": "code",
|
32
|
-
"execution_count": 5,
|
33
|
-
"id": "0ccf8972",
|
34
|
-
"metadata": {},
|
35
|
-
"outputs": [
|
36
|
-
{
|
37
|
-
"name": "stdout",
|
38
|
-
"output_type": "stream",
|
39
|
-
"text": [
|
40
|
-
"Probability using Laplace (add-one) smoothing: 0.029411764705882353\n",
|
41
|
-
"Probability using Lidstone (add-delta) with Jeffrey's smoothing: 0.029411764705882353\n"
|
42
|
-
]
|
43
|
-
}
|
44
|
-
],
|
45
|
-
"source": [
|
46
|
-
"from collections import defaultdict\n",
|
47
|
-
"import math\n",
|
48
|
-
"\n",
|
49
|
-
"# Step 1: Get text from a file\n",
|
50
|
-
"def read_text_from_file(filename):\n",
|
51
|
-
" with open(filename, 'r', encoding='utf-8') as file:\n",
|
52
|
-
" text = file.read()\n",
|
53
|
-
" return text\n",
|
54
|
-
"\n",
|
55
|
-
"# Step 2: Get all possible bigrams\n",
|
56
|
-
"def get_bigrams(text):\n",
|
57
|
-
" words = text.split()\n",
|
58
|
-
" bigrams = [(words[i], words[i + 1]) for i in range(len(words) - 1)]\n",
|
59
|
-
" return bigrams\n",
|
60
|
-
"\n",
|
61
|
-
"# Step 3: Apply Laplace (add-one) smoothing\n",
|
62
|
-
"def laplace_smoothing(bigrams, vocabulary):\n",
|
63
|
-
" count_bigram = defaultdict(int)\n",
|
64
|
-
" count_unigram = defaultdict(int)\n",
|
65
|
-
"\n",
|
66
|
-
" for bigram in bigrams:\n",
|
67
|
-
" count_bigram[bigram] += 1\n",
|
68
|
-
" count_unigram[bigram[0]] += 1\n",
|
69
|
-
"\n",
|
70
|
-
" def laplace_probability(bigram):\n",
|
71
|
-
" return (count_bigram[bigram] + 1) / (count_unigram[bigram[0]] + vocabulary)\n",
|
72
|
-
"\n",
|
73
|
-
" return laplace_probability\n",
|
74
|
-
"\n",
|
75
|
-
"# Step 4: Apply Lidstone (add-delta) smoothing with Jeffrey's smoothing\n",
|
76
|
-
"def lidstone_smoothing(bigrams, vocabulary, delta):\n",
|
77
|
-
" count_bigram = defaultdict(int)\n",
|
78
|
-
" count_unigram = defaultdict(int)\n",
|
79
|
-
"\n",
|
80
|
-
" for bigram in bigrams:\n",
|
81
|
-
" count_bigram[bigram] += 1\n",
|
82
|
-
" count_unigram[bigram[0]] += 1\n",
|
83
|
-
"\n",
|
84
|
-
" def lidstone_probability(bigram):\n",
|
85
|
-
" numerator = count_bigram[bigram] + delta\n",
|
86
|
-
" denominator = count_unigram[bigram[0]] + (vocabulary * delta)\n",
|
87
|
-
" return numerator / denominator\n",
|
88
|
-
"\n",
|
89
|
-
" return lidstone_probability\n",
|
90
|
-
"\n",
|
91
|
-
"# Step 5: Given a random bigram as input, print the probabilities using both smoothing methods\n",
|
92
|
-
"def main():\n",
|
93
|
-
" filename = 'lab6-file.txt' # Replace with the actual file path\n",
|
94
|
-
" text = read_text_from_file(filename)\n",
|
95
|
-
" bigrams = get_bigrams(text)\n",
|
96
|
-
" vocabulary = len(set(text.split())) # Size of the vocabulary\n",
|
97
|
-
"\n",
|
98
|
-
" delta = 0.5 # Adjust this value for Lidstone smoothing\n",
|
99
|
-
"\n",
|
100
|
-
" laplace_prob = laplace_smoothing(bigrams, vocabulary)\n",
|
101
|
-
" lidstone_prob = lidstone_smoothing(bigrams, vocabulary, delta)\n",
|
102
|
-
"\n",
|
103
|
-
" input_bigram = ('input', 'bigram') # Replace with your random bigram\n",
|
104
|
-
"\n",
|
105
|
-
" print(f\"Probability using Laplace (add-one) smoothing: {laplace_prob(input_bigram)}\")\n",
|
106
|
-
" print(f\"Probability using Lidstone (add-delta) with Jeffrey's smoothing: {lidstone_prob(input_bigram)}\")\n",
|
107
|
-
"\n",
|
108
|
-
"if __name__ == \"__main__\":\n",
|
109
|
-
" main()\n"
|
110
|
-
]
|
111
|
-
}
|
112
|
-
],
|
113
|
-
"metadata": {
|
114
|
-
"kernelspec": {
|
115
|
-
"display_name": "Python 3",
|
116
|
-
"language": "python",
|
117
|
-
"name": "python3"
|
118
|
-
},
|
119
|
-
"language_info": {
|
120
|
-
"codemirror_mode": {
|
121
|
-
"name": "ipython",
|
122
|
-
"version": 3
|
123
|
-
},
|
124
|
-
"file_extension": ".py",
|
125
|
-
"mimetype": "text/x-python",
|
126
|
-
"name": "python",
|
127
|
-
"nbconvert_exporter": "python",
|
128
|
-
"pygments_lexer": "ipython3",
|
129
|
-
"version": "3.8.8"
|
130
|
-
}
|
131
|
-
},
|
132
|
-
"nbformat": 4,
|
133
|
-
"nbformat_minor": 5
|
134
|
-
}
|