noshot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. noshot/__init__.py +1 -0
  2. noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(A) Breadth First Search.ipynb +112 -0
  3. noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(B) Depth First Search.ipynb +111 -0
  4. noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(C) Uniform Cost Search.ipynb +134 -0
  5. noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(D) Depth Limites Search.ipynb +115 -0
  6. noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(E) Iterative Deepening DFS.ipynb +123 -0
  7. noshot/data/AIDS CN NLP/AIDS/10. ANOVA/2_ANOVA.csv +769 -0
  8. noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA (Repeated Measure).ipynb +126 -0
  9. noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA.ipynb +134 -0
  10. noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Sample 1 Way ANOVA Test.ipynb +119 -0
  11. noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Two Way ANOVA.ipynb +138 -0
  12. noshot/data/AIDS CN NLP/AIDS/10. ANOVA/reaction_time.csv +5 -0
  13. noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sample_data.csv +16 -0
  14. noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sleep_deprivation.csv +4 -0
  15. noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/3_Linear.csv +4802 -0
  16. noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression LAB.ipynb +113 -0
  17. noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression New- sklearn.ipynb +118 -0
  18. noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression.ipynb +148 -0
  19. noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/house_rate.csv +22 -0
  20. noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression New- sklearn.ipynb +128 -0
  21. noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression.ipynb +145 -0
  22. noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/default.csv +1001 -0
  23. noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/hours_scores_records.csv +101 -0
  24. noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(A) Astar.ipynb +256 -0
  25. noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(B) IDAstar.ipynb +157 -0
  26. noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(C) SMAstar.ipynb +178 -0
  27. noshot/data/AIDS CN NLP/AIDS/3. Genetic Algorithm/Genetic.ipynb +95 -0
  28. noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Simulated Annealing.ipynb +74 -0
  29. noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Sudoku Simulated Annealing.ipynb +103 -0
  30. noshot/data/AIDS CN NLP/AIDS/5. Alpha Beta Pruning/AlphaBetaPruning.ipynb +182 -0
  31. noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(A) CSP House Allocation.ipynb +120 -0
  32. noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(B) CSP Map Coloring.ipynb +125 -0
  33. noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/Random Sampling.ipynb +73 -0
  34. noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/height_weight_bmi.csv +8389 -0
  35. noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test Hash Function.ipynb +141 -0
  36. noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test.ipynb +151 -0
  37. noshot/data/AIDS CN NLP/AIDS/8. Z Test/height_weight_bmi.csv +8389 -0
  38. noshot/data/AIDS CN NLP/AIDS/9. T Test/1_heart.csv +304 -0
  39. noshot/data/AIDS CN NLP/AIDS/9. T Test/Independent T Test.ipynb +119 -0
  40. noshot/data/AIDS CN NLP/AIDS/9. T Test/Paired T Test.ipynb +118 -0
  41. noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test Hash Function.ipynb +142 -0
  42. noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test.ipynb +158 -0
  43. noshot/data/AIDS CN NLP/AIDS/9. T Test/height_weight_bmi.csv +8389 -0
  44. noshot/data/AIDS CN NLP/AIDS/9. T Test/iq_test.csv +0 -0
  45. noshot/data/AIDS CN NLP/AIDS/Others (AllinOne)/All In One.ipynb +4581 -0
  46. noshot/data/AIDS CN NLP/CN/1. Chat Application/chat.java +81 -0
  47. noshot/data/AIDS CN NLP/CN/1. Chat Application/output.png +0 -0
  48. noshot/data/AIDS CN NLP/CN/1. Chat Application/procedure.png +0 -0
  49. noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/LAN.tcl +65 -0
  50. noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/analysis.awk +44 -0
  51. noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/output.png +0 -0
  52. noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/procedure.png +0 -0
  53. noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/complexdcf.tcl +229 -0
  54. noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/output.png +0 -0
  55. noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/procedure.png +0 -0
  56. noshot/data/AIDS CN NLP/CN/2. File Transfer/file_to_send.txt +2 -0
  57. noshot/data/AIDS CN NLP/CN/2. File Transfer/filetransfer.java +119 -0
  58. noshot/data/AIDS CN NLP/CN/2. File Transfer/output.png +0 -0
  59. noshot/data/AIDS CN NLP/CN/2. File Transfer/procedure.png +0 -0
  60. noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/output.png +0 -0
  61. noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/procedure.png +0 -0
  62. noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/rmi.java +56 -0
  63. noshot/data/AIDS CN NLP/CN/4. Wired Network/output.png +0 -0
  64. noshot/data/AIDS CN NLP/CN/4. Wired Network/procedure.png +0 -0
  65. noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.awk +25 -0
  66. noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.tcl +81 -0
  67. noshot/data/AIDS CN NLP/CN/5. Wireless Network/output.png +0 -0
  68. noshot/data/AIDS CN NLP/CN/5. Wireless Network/procedure.png +0 -0
  69. noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.awk +27 -0
  70. noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.tcl +153 -0
  71. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/analysis.awk +27 -0
  72. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/output.png +0 -0
  73. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/sack.tcl +86 -0
  74. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/vegas.tcl +86 -0
  75. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/analysis.awk +28 -0
  76. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/output.png +0 -0
  77. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/reno.tcl +78 -0
  78. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/tahoe.tcl +79 -0
  79. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/analysis.awk +27 -0
  80. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/flow.tcl +163 -0
  81. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/output.png +0 -0
  82. noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/procedure.png +0 -0
  83. noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/DV.tcl +111 -0
  84. noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/LS.tcl +106 -0
  85. noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/analysis.awk +36 -0
  86. noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/output.png +0 -0
  87. noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/procedure.png +0 -0
  88. noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/analysis.awk +20 -0
  89. noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/broadcast.tcl +76 -0
  90. noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/multicast.tcl +103 -0
  91. noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/output.png +0 -0
  92. noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/procedure.png +0 -0
  93. noshot/data/AIDS CN NLP/CN/9. DHCP/DHCP.java +125 -0
  94. noshot/data/AIDS CN NLP/CN/9. DHCP/output.png +0 -0
  95. noshot/data/AIDS CN NLP/CN/9. DHCP/procedure.png +0 -0
  96. noshot/data/AIDS CN NLP/NLP/NLP 1/1-Prereqs.py +18 -0
  97. noshot/data/AIDS CN NLP/NLP/NLP 1/2-Chi2test.py +83 -0
  98. noshot/data/AIDS CN NLP/NLP/NLP 1/2-T-test.py +79 -0
  99. noshot/data/AIDS CN NLP/NLP/NLP 1/3-WSD-nb.py +53 -0
  100. noshot/data/AIDS CN NLP/NLP/NLP 1/4-Hindle-Rooth.py +53 -0
  101. noshot/data/AIDS CN NLP/NLP/NLP 1/5-HMM-Trellis.py +82 -0
  102. noshot/data/AIDS CN NLP/NLP/NLP 1/6-HMM-Viterbi.py +16 -0
  103. noshot/data/AIDS CN NLP/NLP/NLP 1/7-PCFG-parsetree.py +15 -0
  104. noshot/data/AIDS CN NLP/NLP/NLP 1/Chi2test.ipynb +285 -0
  105. noshot/data/AIDS CN NLP/NLP/NLP 1/Hindle-Rooth.ipynb +179 -0
  106. noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 10 - Text generator using LSTM.ipynb +1461 -0
  107. noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 11 NMT.ipynb +2307 -0
  108. noshot/data/AIDS CN NLP/NLP/NLP 1/PCFG.ipynb +134 -0
  109. noshot/data/AIDS CN NLP/NLP/NLP 1/Prereqs.ipynb +131 -0
  110. noshot/data/AIDS CN NLP/NLP/NLP 1/T test.ipynb +252 -0
  111. noshot/data/AIDS CN NLP/NLP/NLP 1/TFIDF BOW.ipynb +171 -0
  112. noshot/data/AIDS CN NLP/NLP/NLP 1/Trellis.ipynb +244 -0
  113. noshot/data/AIDS CN NLP/NLP/NLP 1/WSD.ipynb +645 -0
  114. noshot/data/AIDS CN NLP/NLP/NLP 1/Word2Vec.ipynb +93 -0
  115. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/tokenizer.ipynb +370 -0
  116. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/training_tokenizer.txt +6 -0
  117. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/exp0.ipynb +274 -0
  118. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/lab2.ipynb +905 -0
  119. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/test.txt +1 -0
  120. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/tokenizing.ipynb +272 -0
  121. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/collocation.ipynb +332 -0
  122. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/lab3.ipynb +549 -0
  123. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/nlp.txt +1 -0
  124. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/Lab4-NLP-Exp-2.ipynb +817 -0
  125. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/collocation.ipynb +332 -0
  126. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/NLP-Lab-5-Exp3.ipynb +231 -0
  127. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/word-sense-disambiguation.ipynb +507 -0
  128. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab06(additional-exercise)/lab6.ipynb +134 -0
  129. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP Exp 4.ipynb +255 -0
  130. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP_Exp_5.ipynb +159 -0
  131. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab08(PCFG)/PCFG.ipynb +282 -0
  132. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/Lab 9 - MLP classifier.ipynb +670 -0
  133. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/MLP-alternative-code.ipynb +613 -0
  134. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/hindle-rooth-algorithm.ipynb +74 -0
  135. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab10(LSTM)/Lab_10_Text_generator_using_LSTM.ipynb +480 -0
  136. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Machine-translation.ipynb +445 -0
  137. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Viterbi-PCFG.ipynb +105 -0
  138. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/corpora_tools.py +87 -0
  139. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/data_utils.py +11 -0
  140. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/train_translator.py +83 -0
  141. noshot/data/AIDS CN NLP/NLP/NLP 2/Lab12(Information-Extraction)/Information_Extraction.ipynb +201 -0
  142. noshot/data/AIDS CN NLP/NLP/NLP 3/Backtrack-without-Verbitri.ipynb +185 -0
  143. noshot/data/AIDS CN NLP/NLP/NLP 3/Backward-Procedure.ipynb +597 -0
  144. noshot/data/AIDS CN NLP/NLP/NLP 3/Bag_of.ipynb +1422 -0
  145. noshot/data/AIDS CN NLP/NLP/NLP 3/CYK-algorithm.ipynb +1067 -0
  146. noshot/data/AIDS CN NLP/NLP/NLP 3/Forward-Procedure.ipynb +477 -0
  147. noshot/data/AIDS CN NLP/NLP/NLP 3/LSTM.ipynb +1290 -0
  148. noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 10 - Text generator using LSTM.ipynb +1461 -0
  149. noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 11 NMT.ipynb +2307 -0
  150. noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-4.ipynb +216 -0
  151. noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-5.ipynb +216 -0
  152. noshot/data/AIDS CN NLP/NLP/NLP 3/abc.txt +6 -0
  153. noshot/data/AIDS CN NLP/NLP/NLP 3/ex-1-nltk.ipynb +711 -0
  154. noshot/data/AIDS CN NLP/NLP/NLP 3/ex-2-nlp.ipynb +267 -0
  155. noshot/data/AIDS CN NLP/NLP/NLP 3/exp8&9.ipynb +305 -0
  156. noshot/data/AIDS CN NLP/NLP/NLP 3/hind.ipynb +287 -0
  157. noshot/data/AIDS CN NLP/NLP/NLP 3/lab66.ipynb +752 -0
  158. noshot/data/AIDS CN NLP/NLP/NLP 3/leb_3.ipynb +612 -0
  159. noshot/data/AIDS CN NLP/NLP/NLP 3/naive_bayes_classifier.pkl +0 -0
  160. noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_1.ipynb +3008 -0
  161. noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_2.ipynb +3095 -0
  162. noshot/data/AIDS CN NLP/NLP/NLP 3/nlplab-9.ipynb +295 -0
  163. noshot/data/AIDS CN NLP/NLP/NLP 3/nltk-ex-4.ipynb +506 -0
  164. noshot/data/AIDS CN NLP/NLP/NLP 3/text1.txt +48 -0
  165. noshot/data/AIDS CN NLP/NLP/NLP 3/text2.txt +8 -0
  166. noshot/data/AIDS CN NLP/NLP/NLP 3/text3.txt +48 -0
  167. noshot/data/AIDS CN NLP/NLP/NLP 3/translation-rnn.ipynb +812 -0
  168. noshot/data/AIDS CN NLP/NLP/NLP 3/word2vector.ipynb +173 -0
  169. noshot/data/AIDS CN NLP/NLP/NLP 4/Backward Procedure Algorithm.ipynb +179 -0
  170. noshot/data/AIDS CN NLP/NLP/NLP 4/Chi Square Collocation.ipynb +208 -0
  171. noshot/data/AIDS CN NLP/NLP/NLP 4/Collocation (T test).ipynb +188 -0
  172. noshot/data/AIDS CN NLP/NLP/NLP 4/Experiment 1.ipynb +437 -0
  173. noshot/data/AIDS CN NLP/NLP/NLP 4/Forward Procedure Algorithm.ipynb +132 -0
  174. noshot/data/AIDS CN NLP/NLP/NLP 4/Hindle Rooth.ipynb +414 -0
  175. noshot/data/AIDS CN NLP/NLP/NLP 4/MachineTranslation.ipynb +368 -0
  176. noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using MLPClassifier.ipynb +86 -0
  177. noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using Tensorflow.ipynb +112 -0
  178. noshot/data/AIDS CN NLP/NLP/NLP 4/PCFG Inside Probability.ipynb +451 -0
  179. noshot/data/AIDS CN NLP/NLP/NLP 4/Text Generation using LSTM.ipynb +297 -0
  180. noshot/data/AIDS CN NLP/NLP/NLP 4/Viterbi.ipynb +310 -0
  181. noshot/data/AIDS CN NLP/NLP/NLP 4/Word Sense Disambiguation.ipynb +335 -0
  182. noshot/data/AIDS CN NLP/NLP/NLP 5/10.Text Generation using LSTM.ipynb +316 -0
  183. noshot/data/AIDS CN NLP/NLP/NLP 5/11.Machine Translation.ipynb +868 -0
  184. noshot/data/AIDS CN NLP/NLP/NLP 5/2.T and Chi2 Test.ipynb +204 -0
  185. noshot/data/AIDS CN NLP/NLP/NLP 5/3.Word Sense Diambiguation.ipynb +234 -0
  186. noshot/data/AIDS CN NLP/NLP/NLP 5/4.Hinddle and Rooth.ipynb +128 -0
  187. noshot/data/AIDS CN NLP/NLP/NLP 5/5.Forward and Backward.ipynb +149 -0
  188. noshot/data/AIDS CN NLP/NLP/NLP 5/6.Viterbi.ipynb +111 -0
  189. noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG Parse Tree.ipynb +134 -0
  190. noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG using cyk.ipynb +101 -0
  191. noshot/data/AIDS CN NLP/NLP/NLP 5/8.Bag of words and TF-IDF.ipynb +310 -0
  192. noshot/data/AIDS CN NLP/NLP/NLP 5/9.Word2Vector.ipynb +78 -0
  193. noshot/data/AIDS CN NLP/NLP/NLP 5/NLP ALL In One.ipynb +2619 -0
  194. noshot/data/AIDS CN NLP/NLP/NLP 5/sample1.txt +15 -0
  195. noshot/data/AIDS CN NLP/NLP/NLP 5/sample2.txt +4 -0
  196. noshot/data/AIDS CN NLP/NLP/NLP 5/word2vec_model.bin +0 -0
  197. noshot/data/AIDS CN NLP/NLP/NLP 6/1. Tokenize, Tagging, NER, Parse Tree.ipynb +312 -0
  198. noshot/data/AIDS CN NLP/NLP/NLP 6/2. T Test and Chi2 Test.ipynb +185 -0
  199. noshot/data/AIDS CN NLP/NLP/NLP 6/3. Naive Bayes WSD.ipynb +199 -0
  200. noshot/data/AIDS CN NLP/NLP/NLP 6/4. Hinddle and Rooth.ipynb +151 -0
  201. noshot/data/AIDS CN NLP/NLP/NLP 6/5 and 6 FWD, BWD, Viterbi.ipynb +164 -0
  202. noshot/data/AIDS CN NLP/NLP/NLP 6/7. PCFG using CYK.ipynb +383 -0
  203. noshot/data/AIDS CN NLP/NLP/NLP 6/8. BOW and TF-IDF.ipynb +252 -0
  204. noshot/data/AIDS CN NLP/Ubuntu CN Lab.iso +0 -0
  205. noshot/main.py +47 -0
  206. noshot-0.1.0.dist-info/LICENSE.txt +21 -0
  207. noshot-0.1.0.dist-info/METADATA +65 -0
  208. noshot-0.1.0.dist-info/RECORD +210 -0
  209. noshot-0.1.0.dist-info/WHEEL +5 -0
  210. noshot-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,612 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "514c7e1f-e91a-4b98-8474-8d5578ccba97",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stderr",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "[nltk_data] Downloading package punkt to\n",
14
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
15
+ "[nltk_data] Package punkt is already up-to-date!\n",
16
+ "[nltk_data] Downloading package stopwords to\n",
17
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
18
+ "[nltk_data] Package stopwords is already up-to-date!\n",
19
+ "[nltk_data] Downloading package wordnet to\n",
20
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
21
+ "[nltk_data] Package wordnet is already up-to-date!\n"
22
+ ]
23
+ },
24
+ {
25
+ "name": "stdout",
26
+ "output_type": "stream",
27
+ "text": [
28
+ "Accuracy: 1.00\n",
29
+ "Most Informative Features\n",
30
+ " beautiful = None financ : river = 1.7 : 1.0\n",
31
+ " cash = None river : financ = 1.7 : 1.0\n",
32
+ " deposit = None river : financ = 1.7 : 1.0\n",
33
+ " he = None river : financ = 1.7 : 1.0\n",
34
+ " money = None river : financ = 1.7 : 1.0\n",
35
+ " sat = None financ : river = 1.7 : 1.0\n",
36
+ " the = None financ : river = 1.7 : 1.0\n",
37
+ " went = None river : financ = 1.7 : 1.0\n",
38
+ " withdrew = None river : financ = 1.7 : 1.0\n",
39
+ " bank = True financ : river = 1.0 : 1.0\n",
40
+ "The predicted sense for 'He likes to fish by the bank' is 'finance'\n"
41
+ ]
42
+ }
43
+ ],
44
+ "source": [
45
+ "import nltk\n",
46
+ "from nltk.classify import NaiveBayesClassifier\n",
47
+ "from nltk.corpus import stopwords\n",
48
+ "from nltk import word_tokenize, WordNetLemmatizer\n",
49
+ "from nltk.classify.util import accuracy\n",
50
+ "import random\n",
51
+ "\n",
52
+ "# Download necessary NLTK data files\n",
53
+ "nltk.download('punkt')\n",
54
+ "nltk.download('stopwords')\n",
55
+ "nltk.download('wordnet')\n",
56
+ "\n",
57
+ "# Initialize WordNetLemmatizer\n",
58
+ "lemmatizer = WordNetLemmatizer()\n",
59
+ "\n",
60
+ "# Sample training data with contexts and senses\n",
61
+ "data = [\n",
62
+ " (\"The bank of the river was beautiful\", \"river\"),\n",
63
+ " (\"He went to the bank to deposit money\", \"finance\"),\n",
64
+ " (\"She sat on the river bank\", \"river\"),\n",
65
+ " (\"He is working at the financial bank\", \"finance\"),\n",
66
+ " (\"The boat was near the river bank\", \"river\"),\n",
67
+ " (\"She withdrew cash from the bank\", \"finance\")\n",
68
+ "]\n",
69
+ "\n",
70
+ "# Preprocessing function to extract features\n",
71
+ "def extract_features(sentence):\n",
72
+ " stop_words = set(stopwords.words('english'))\n",
73
+ " words = word_tokenize(sentence)\n",
74
+ " words = [lemmatizer.lemmatize(word.lower()) for word in words if word.isalpha() and word not in stop_words]\n",
75
+ " return {word: True for word in words}\n",
76
+ "\n",
77
+ "# Create feature sets for training\n",
78
+ "feature_sets = [(extract_features(context), sense) for (context, sense) in data]\n",
79
+ "\n",
80
+ "# Shuffle and split the data into training and test sets\n",
81
+ "random.shuffle(feature_sets)\n",
82
+ "train_set, test_set = feature_sets[:4], feature_sets[4:]\n",
83
+ "\n",
84
+ "# Train the Naïve Bayes classifier\n",
85
+ "classifier = NaiveBayesClassifier.train(train_set)\n",
86
+ "\n",
87
+ "# Evaluate the classifier\n",
88
+ "print(f'Accuracy: {accuracy(classifier, test_set):.2f}')\n",
89
+ "classifier.show_most_informative_features()\n",
90
+ "\n",
91
+ "# Sample prediction\n",
92
+ "new_context = \"He likes to fish by the bank\"\n",
93
+ "features = extract_features(new_context)\n",
94
+ "predicted_sense = classifier.classify(features)\n",
95
+ "print(f\"The predicted sense for '{new_context}' is '{predicted_sense}'\")\n"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": 3,
101
+ "id": "6fd6f22f-4ca9-48a7-a783-bbd49af2eaa2",
102
+ "metadata": {},
103
+ "outputs": [
104
+ {
105
+ "name": "stdout",
106
+ "output_type": "stream",
107
+ "text": [
108
+ "Accuracy: 1.00\n",
109
+ "Most Informative Features\n",
110
+ " he = None river : financ = 1.4 : 1.0\n",
111
+ " she = None river : financ = 1.3 : 1.0\n",
112
+ " account = None river : financ = 1.2 : 1.0\n",
113
+ " along = None financ : river = 1.2 : 1.0\n",
114
+ " financial = None river : financ = 1.2 : 1.0\n",
115
+ " new = None river : financ = 1.2 : 1.0\n",
116
+ " picnic = None financ : river = 1.2 : 1.0\n",
117
+ " service = None river : financ = 1.2 : 1.0\n",
118
+ " beauty = None financ : river = 1.1 : 1.0\n",
119
+ " customer = None river : financ = 1.1 : 1.0\n",
120
+ "The predicted sense for 'He likes to fish by the bank' is 'finance'\n"
121
+ ]
122
+ },
123
+ {
124
+ "name": "stderr",
125
+ "output_type": "stream",
126
+ "text": [
127
+ "[nltk_data] Downloading package punkt to\n",
128
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
129
+ "[nltk_data] Package punkt is already up-to-date!\n",
130
+ "[nltk_data] Downloading package stopwords to\n",
131
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
132
+ "[nltk_data] Package stopwords is already up-to-date!\n",
133
+ "[nltk_data] Downloading package wordnet to\n",
134
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
135
+ "[nltk_data] Package wordnet is already up-to-date!\n"
136
+ ]
137
+ }
138
+ ],
139
+ "source": [
140
+ "import nltk\n",
141
+ "from nltk.classify import NaiveBayesClassifier\n",
142
+ "from nltk.corpus import stopwords\n",
143
+ "from nltk import word_tokenize, WordNetLemmatizer\n",
144
+ "from nltk.classify.util import accuracy\n",
145
+ "import random\n",
146
+ "\n",
147
+ "# Download necessary NLTK data files\n",
148
+ "nltk.download('punkt')\n",
149
+ "nltk.download('stopwords')\n",
150
+ "nltk.download('wordnet')\n",
151
+ "\n",
152
+ "# Initialize WordNetLemmatizer\n",
153
+ "lemmatizer = WordNetLemmatizer()\n",
154
+ "\n",
155
+ "# Preprocessing function to extract features\n",
156
+ "def extract_features(sentence):\n",
157
+ " stop_words = set(stopwords.words('english'))\n",
158
+ " words = word_tokenize(sentence)\n",
159
+ " words = [lemmatizer.lemmatize(word.lower()) for word in words if word.isalpha() and word not in stop_words]\n",
160
+ " return {word: True for word in words}\n",
161
+ "\n",
162
+ "# Read the training data from the file\n",
163
+ "training_data = []\n",
164
+ "with open(\"E://126156048/leb_3/training_set.txt\", 'r') as file:\n",
165
+ " for line in file:\n",
166
+ " context, sense = line.strip().split('\\t')\n",
167
+ " training_data.append((context, sense))\n",
168
+ "\n",
169
+ "# Create feature sets for training\n",
170
+ "feature_sets = [(extract_features(context), sense) for (context, sense) in training_data]\n",
171
+ "\n",
172
+ "# Shuffle and split the data into training and test sets\n",
173
+ "random.shuffle(feature_sets)\n",
174
+ "train_set, test_set = feature_sets, feature_sets[:100]\n",
175
+ "\n",
176
+ "# Train the Naïve Bayes classifier\n",
177
+ "classifier = NaiveBayesClassifier.train(train_set)\n",
178
+ "\n",
179
+ "# Evaluate the classifier\n",
180
+ "print(f'Accuracy: {accuracy(classifier, test_set):.2f}')\n",
181
+ "classifier.show_most_informative_features()\n",
182
+ "\n",
183
+ "# Sample prediction\n",
184
+ "new_context = \"He likes to fish by the bank\"\n",
185
+ "features = extract_features(new_context)\n",
186
+ "predicted_sense = classifier.classify(features)\n",
187
+ "print(f\"The predicted sense for '{new_context}' is '{predicted_sense}'\")\n"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 4,
193
+ "id": "69a75873-c66e-4a92-9352-1f142b08d42e",
194
+ "metadata": {},
195
+ "outputs": [
196
+ {
197
+ "name": "stdout",
198
+ "output_type": "stream",
199
+ "text": [
200
+ "Accuracy with bigrams: 1.00\n",
201
+ "Most Informative Features\n",
202
+ " the_bank = None river : financ = 1.5 : 1.0\n",
203
+ " he = None river : financ = 1.4 : 1.0\n",
204
+ " account = None river : financ = 1.3 : 1.0\n",
205
+ " she = None river : financ = 1.3 : 1.0\n",
206
+ " along = None financ : river = 1.2 : 1.0\n",
207
+ " along_river = None financ : river = 1.2 : 1.0\n",
208
+ " the_river = None financ : river = 1.2 : 1.0\n",
209
+ " bank_offer = None river : financ = 1.2 : 1.0\n",
210
+ " beauty = None financ : river = 1.2 : 1.0\n",
211
+ " financial = None river : financ = 1.2 : 1.0\n",
212
+ "The predicted sense for 'He likes to fish by the bank' with bigrams is 'finance'\n"
213
+ ]
214
+ }
215
+ ],
216
+ "source": [
217
+ "from nltk import bigrams\n",
218
+ "\n",
219
+ "def extract_features_with_bigrams(sentence):\n",
220
+ " stop_words = set(stopwords.words('english'))\n",
221
+ " words = word_tokenize(sentence)\n",
222
+ " words = [lemmatizer.lemmatize(word.lower()) for word in words if word.isalpha() and word not in stop_words]\n",
223
+ " word_features = {word: True for word in words}\n",
224
+ " bigram_features = {f\"{bigram[0]}_{bigram[1]}\": True for bigram in bigrams(words)}\n",
225
+ " return {**word_features, **bigram_features}\n",
226
+ "\n",
227
+ "# Create feature sets with bigrams\n",
228
+ "feature_sets_with_bigrams = [(extract_features_with_bigrams(context), sense) for (context, sense) in training_data]\n",
229
+ "\n",
230
+ "# Shuffle and split the data into training and test sets\n",
231
+ "random.shuffle(feature_sets_with_bigrams)\n",
232
+ "train_set, test_set = feature_sets_with_bigrams[:40], feature_sets_with_bigrams[40:]\n",
233
+ "\n",
234
+ "# Train the Naïve Bayes classifier with bigrams\n",
235
+ "classifier_with_bigrams = NaiveBayesClassifier.train(train_set)\n",
236
+ "\n",
237
+ "# Evaluate the classifier\n",
238
+ "print(f'Accuracy with bigrams: {accuracy(classifier_with_bigrams, test_set):.2f}')\n",
239
+ "classifier_with_bigrams.show_most_informative_features()\n",
240
+ "\n",
241
+ "# Sample prediction\n",
242
+ "features_with_bigrams = extract_features_with_bigrams(new_context)\n",
243
+ "predicted_sense_with_bigrams = classifier_with_bigrams.classify(features_with_bigrams)\n",
244
+ "print(f\"The predicted sense for '{new_context}' with bigrams is '{predicted_sense_with_bigrams}'\")\n"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": 5,
250
+ "id": "2645e42f-4b96-41d9-919c-02945700c2e8",
251
+ "metadata": {},
252
+ "outputs": [
253
+ {
254
+ "name": "stdout",
255
+ "output_type": "stream",
256
+ "text": [
257
+ "Accuracy with POS: 1.00\n",
258
+ "Most Informative Features\n",
259
+ " river = None financ : river = 5.3 : 1.0\n",
260
+ " river_bank = None financ : river = 5.3 : 1.0\n",
261
+ " account = None river : financ = 1.3 : 1.0\n",
262
+ " bank_provided = True river : financ = 1.2 : 1.0\n",
263
+ " financial = None river : financ = 1.2 : 1.0\n",
264
+ " new = None river : financ = 1.2 : 1.0\n",
265
+ " provided = True river : financ = 1.2 : 1.0\n",
266
+ " service = None river : financ = 1.2 : 1.0\n",
267
+ " beauty = None financ : river = 1.2 : 1.0\n",
268
+ " enjoyed = None financ : river = 1.2 : 1.0\n",
269
+ "The predicted sense for 'He likes to fish by the bank' with POS is 'finance'\n"
270
+ ]
271
+ }
272
+ ],
273
+ "source": [
274
+ "from nltk import pos_tag\n",
275
+ "\n",
276
+ "def extract_features_with_pos(sentence):\n",
277
+ " stop_words = set(stopwords.words('english'))\n",
278
+ " words = word_tokenize(sentence)\n",
279
+ " words = [lemmatizer.lemmatize(word.lower()) for word in words if word.isalpha() and word not in stop_words]\n",
280
+ " pos_tags = pos_tag(words)\n",
281
+ " \n",
282
+ " # Consider only nouns, verbs, and adjectives for feature extraction\n",
283
+ " relevant_words = [word for word, pos in pos_tags if pos.startswith('N') or pos.startswith('V') or pos.startswith('J')]\n",
284
+ " \n",
285
+ " word_features = {word: True for word in relevant_words}\n",
286
+ " bigram_features = {f\"{bigram[0]}_{bigram[1]}\": True for bigram in bigrams(relevant_words)}\n",
287
+ " \n",
288
+ " return {**word_features, **bigram_features}\n",
289
+ "\n",
290
+ "# Create feature sets with POS\n",
291
+ "feature_sets_with_pos = [(extract_features_with_pos(context), sense) for (context, sense) in training_data]\n",
292
+ "\n",
293
+ "# Shuffle and split the data into training and test sets\n",
294
+ "random.shuffle(feature_sets_with_pos)\n",
295
+ "train_set, test_set = feature_sets_with_pos[:40], feature_sets_with_pos[40:]\n",
296
+ "\n",
297
+ "# Train the Naïve Bayes classifier with POS features\n",
298
+ "classifier_with_pos = NaiveBayesClassifier.train(train_set)\n",
299
+ "\n",
300
+ "# Evaluate the classifier\n",
301
+ "print(f'Accuracy with POS: {accuracy(classifier_with_pos, test_set):.2f}')\n",
302
+ "classifier_with_pos.show_most_informative_features()\n",
303
+ "\n",
304
+ "# Sample prediction\n",
305
+ "features_with_pos = extract_features_with_pos(new_context)\n",
306
+ "predicted_sense_with_pos = classifier_with_pos.classify(features_with_pos)\n",
307
+ "print(f\"The predicted sense for '{new_context}' with POS is '{predicted_sense_with_pos}'\")\n"
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": 12,
313
+ "id": "eec05832-1967-48f2-8d85-a326c88a5350",
314
+ "metadata": {},
315
+ "outputs": [
316
+ {
317
+ "name": "stdout",
318
+ "output_type": "stream",
319
+ "text": [
320
+ "Accuracy with POS and contextual features: 1.00\n",
321
+ "Most Informative Features\n",
322
+ " river_NN = None financ : river = 2.8 : 1.0\n",
323
+ " the_DT = True river : financ = 1.4 : 1.0\n",
324
+ " contains_loan = False river : financ = 1.3 : 1.0\n",
325
+ " loan = None river : financ = 1.3 : 1.0\n",
326
+ " loan_NN = None river : financ = 1.3 : 1.0\n",
327
+ " the_DT = None financ : river = 1.3 : 1.0\n",
328
+ " along = None financ : river = 1.3 : 1.0\n",
329
+ " along_RB = None financ : river = 1.3 : 1.0\n",
330
+ " along_river = None financ : river = 1.3 : 1.0\n",
331
+ " river_JJ = None financ : river = 1.3 : 1.0\n",
332
+ "The predicted sense for 'He likes to fish by the bank.' with POS and contextual features is 'river'\n"
333
+ ]
334
+ },
335
+ {
336
+ "name": "stderr",
337
+ "output_type": "stream",
338
+ "text": [
339
+ "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
340
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
341
+ "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n",
342
+ "[nltk_data] date!\n",
343
+ "[nltk_data] Downloading package punkt to\n",
344
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
345
+ "[nltk_data] Package punkt is already up-to-date!\n",
346
+ "[nltk_data] Downloading package wordnet to\n",
347
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
348
+ "[nltk_data] Package wordnet is already up-to-date!\n",
349
+ "[nltk_data] Downloading package stopwords to\n",
350
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
351
+ "[nltk_data] Package stopwords is already up-to-date!\n"
352
+ ]
353
+ }
354
+ ],
355
+ "source": [
356
+ "import random\n",
357
+ "from nltk import NaiveBayesClassifier, pos_tag, word_tokenize\n",
358
+ "from nltk.corpus import stopwords\n",
359
+ "from nltk.stem import WordNetLemmatizer\n",
360
+ "from nltk import bigrams\n",
361
+ "from nltk.classify import accuracy\n",
362
+ "import nltk\n",
363
+ "\n",
364
+ "nltk.download('averaged_perceptron_tagger')\n",
365
+ "nltk.download('punkt')\n",
366
+ "nltk.download('wordnet')\n",
367
+ "nltk.download('stopwords')\n",
368
+ "\n",
369
+ "# Updated and expanded training data with additional examples\n",
370
+ "expanded_training_data = [\n",
371
+ " # River sense\n",
372
+ " (\"The children played by the river bank.\", \"river\"),\n",
373
+ " (\"They set up a picnic by the river bank.\", \"river\"),\n",
374
+ " (\"We spent the afternoon walking along the river bank.\", \"river\"),\n",
375
+ " (\"He enjoys kayaking near the river bank every weekend.\", \"river\"),\n",
376
+ " (\"The river bank was bustling with people fishing.\", \"river\"),\n",
377
+ " (\"The river flooded and covered the bank with water.\", \"river\"),\n",
378
+ " (\"We followed the river bank trail through the forest.\", \"river\"),\n",
379
+ " (\"The boat was anchored by the river bank.\", \"river\"),\n",
380
+ " (\"The river bank was a perfect spot for our tent.\", \"river\"),\n",
381
+ " (\"Wildflowers grew along the river bank.\", \"river\"),\n",
382
+ " (\"The river bank had eroded after the heavy rains.\", \"river\"),\n",
383
+ " \n",
384
+ " # Finance sense\n",
385
+ " (\"I went to the bank to deposit a check.\", \"finance\"),\n",
386
+ " (\"The bank approved my loan application.\", \"finance\"),\n",
387
+ " (\"She worked as a teller at the local bank.\", \"finance\"),\n",
388
+ " (\"They offer excellent financial services at this bank.\", \"finance\"),\n",
389
+ " (\"You can open an account at any bank in town.\", \"finance\"),\n",
390
+ " (\"The bank charges high interest rates on loans.\", \"finance\"),\n",
391
+ " (\"Our local bank has a great mobile app.\", \"finance\"),\n",
392
+ " (\"He withdrew cash from the bank.\", \"finance\"),\n",
393
+ " (\"She has a meeting with the bank manager.\", \"finance\"),\n",
394
+ " (\"The bank is closed on public holidays.\", \"finance\"),\n",
395
+ " (\"They are opening a new bank branch downtown.\", \"finance\"),\n",
396
+ " (\"She visited the bank to discuss her investment portfolio.\", \"finance\"),\n",
397
+ " (\"The bank provided a financial report for the last quarter.\", \"finance\"),\n",
398
+ " (\"The bank's new policy on loans is quite strict.\", \"finance\"),\n",
399
+ " (\"He worked in a bank before starting his own business.\", \"finance\"),\n",
400
+ " (\"The bank approved a loan application yesterday.\", \"finance\"),\n",
401
+ "]\n",
402
+ "\n",
403
+ "lemmatizer = WordNetLemmatizer()\n",
404
+ "\n",
405
+ "def extract_features_with_pos(sentence):\n",
406
+ " stop_words = set(stopwords.words('english'))\n",
407
+ " words = word_tokenize(sentence)\n",
408
+ " words = [lemmatizer.lemmatize(word.lower()) for word in words if word.isalpha() and word not in stop_words]\n",
409
+ " pos_tags = pos_tag(words)\n",
410
+ " \n",
411
+ " relevant_words = [word for word, pos in pos_tags if pos.startswith('N') or pos.startswith('V') or pos.startswith('J') or pos.startswith('R')]\n",
412
+ " \n",
413
+ " word_features = {word: True for word in relevant_words}\n",
414
+ " bigram_features = {f\"{bigram[0]}_{bigram[1]}\": True for bigram in bigrams(relevant_words)}\n",
415
+ " pos_features = {f\"{word}_{pos}\": True for word, pos in pos_tags}\n",
416
+ " \n",
417
+ " # Additional contextual features\n",
418
+ " context_features = {\n",
419
+ " 'contains_fish': 'fish' in words,\n",
420
+ " 'contains_deposit': 'deposit' in words,\n",
421
+ " 'contains_loan': 'loan' in words,\n",
422
+ " 'contains_bank': 'bank' in words,\n",
423
+ " }\n",
424
+ " \n",
425
+ " return {**word_features, **bigram_features, **pos_features, **context_features}\n",
426
+ "\n",
427
+ "# Create feature sets with POS and additional features\n",
428
+ "feature_sets_with_pos = [(extract_features_with_pos(context), sense) for (context, sense) in expanded_training_data]\n",
429
+ "\n",
430
+ "# Shuffle and split the data into training and test sets\n",
431
+ "random.shuffle(feature_sets_with_pos)\n",
432
+ "train_set, test_set = feature_sets_with_pos[:24], feature_sets_with_pos[24:]\n",
433
+ "\n",
434
+ "# Train the Naïve Bayes classifier with POS and contextual features\n",
435
+ "classifier_with_pos = NaiveBayesClassifier.train(train_set)\n",
436
+ "\n",
437
+ "# Evaluate the classifier\n",
438
+ "print(f'Accuracy with POS and contextual features: {accuracy(classifier_with_pos, test_set):.2f}')\n",
439
+ "classifier_with_pos.show_most_informative_features()\n",
440
+ "\n",
441
+ "# Sample prediction\n",
442
+ "new_context = \"He likes to fish by the bank.\"\n",
443
+ "features_with_pos = extract_features_with_pos(new_context)\n",
444
+ "predicted_sense_with_pos = classifier_with_pos.classify(features_with_pos)\n",
445
+ "print(f\"The predicted sense for '{new_context}' with POS and contextual features is '{predicted_sense_with_pos}'\")"
446
+ ]
447
+ },
448
+ {
449
+ "cell_type": "code",
450
+ "execution_count": 13,
451
+ "id": "d51a4cd6-1689-4b25-b256-83e7dc29aa6c",
452
+ "metadata": {},
453
+ "outputs": [
454
+ {
455
+ "name": "stdout",
456
+ "output_type": "stream",
457
+ "text": [
458
+ "Accuracy with POS and contextual features: 1.00\n",
459
+ "Most Informative Features\n",
460
+ " river_bank = None financ : river = 6.5 : 1.0\n",
461
+ " river_NN = None financ : river = 3.9 : 1.0\n",
462
+ " the_DT = True river : financ = 1.9 : 1.0\n",
463
+ " the_DT = None financ : river = 1.9 : 1.0\n",
464
+ " contains_loan = False river : financ = 1.3 : 1.0\n",
465
+ " loan = None river : financ = 1.3 : 1.0\n",
466
+ " loan_NN = None river : financ = 1.3 : 1.0\n",
467
+ " we_PRP = None financ : river = 1.3 : 1.0\n",
468
+ " she_PRP = None river : financ = 1.2 : 1.0\n",
469
+ " afternoon = None financ : river = 1.1 : 1.0\n",
470
+ "The predicted sense for 'He likes to fish by the bank.' with POS and contextual features is 'finance'\n"
471
+ ]
472
+ },
473
+ {
474
+ "name": "stderr",
475
+ "output_type": "stream",
476
+ "text": [
477
+ "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
478
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
479
+ "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n",
480
+ "[nltk_data] date!\n",
481
+ "[nltk_data] Downloading package punkt to\n",
482
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
483
+ "[nltk_data] Package punkt is already up-to-date!\n",
484
+ "[nltk_data] Downloading package wordnet to\n",
485
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
486
+ "[nltk_data] Package wordnet is already up-to-date!\n",
487
+ "[nltk_data] Downloading package stopwords to\n",
488
+ "[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
489
+ "[nltk_data] Package stopwords is already up-to-date!\n"
490
+ ]
491
+ }
492
+ ],
493
+ "source": [
494
+ "import random\n",
495
+ "from nltk import NaiveBayesClassifier, pos_tag, word_tokenize\n",
496
+ "from nltk.corpus import stopwords\n",
497
+ "from nltk.stem import WordNetLemmatizer\n",
498
+ "from nltk import bigrams\n",
499
+ "from nltk.classify import accuracy\n",
500
+ "import nltk\n",
501
+ "\n",
502
+ "nltk.download('averaged_perceptron_tagger')\n",
503
+ "nltk.download('punkt')\n",
504
+ "nltk.download('wordnet')\n",
505
+ "nltk.download('stopwords')\n",
506
+ "\n",
507
+ "# Load training data from a text file\n",
508
+ "def load_training_data(file_path):\n",
509
+ " with open(file_path, 'r') as file:\n",
510
+ " lines = file.readlines()\n",
511
+ " data = [(line.rsplit(' ', 1)[0], line.rsplit(' ', 1)[1].strip()) for line in lines]\n",
512
+ " return data\n",
513
+ "\n",
514
+ "# Example file path (adjust as needed)\n",
515
+ "file_path = 'E://126156048/leb_3/training_set.txt'\n",
516
+ "training_data = load_training_data(file_path)\n",
517
+ "\n",
518
+ "lemmatizer = WordNetLemmatizer()\n",
519
+ "\n",
520
+ "def extract_features_with_pos(sentence):\n",
521
+ " stop_words = set(stopwords.words('english'))\n",
522
+ " words = word_tokenize(sentence)\n",
523
+ " words = [lemmatizer.lemmatize(word.lower()) for word in words if word.isalpha() and word not in stop_words]\n",
524
+ " pos_tags = pos_tag(words)\n",
525
+ " \n",
526
+ " relevant_words = [word for word, pos in pos_tags if pos.startswith('N') or pos.startswith('V') or pos.startswith('J') or pos.startswith('R')]\n",
527
+ " \n",
528
+ " word_features = {word: True for word in relevant_words}\n",
529
+ " bigram_features = {f\"{bigram[0]}_{bigram[1]}\": True for bigram in bigrams(relevant_words)}\n",
530
+ " pos_features = {f\"{word}_{pos}\": True for word, pos in pos_tags}\n",
531
+ " \n",
532
+ " # Additional contextual features\n",
533
+ " context_features = {\n",
534
+ " 'contains_fish': 'fish' in words,\n",
535
+ " 'contains_deposit': 'deposit' in words,\n",
536
+ " 'contains_loan': 'loan' in words,\n",
537
+ " 'contains_bank': 'bank' in words,\n",
538
+ " }\n",
539
+ " \n",
540
+ " return {**word_features, **bigram_features, **pos_features, **context_features}\n",
541
+ "\n",
542
+ "# Create feature sets with POS and additional features\n",
543
+ "feature_sets_with_pos = [(extract_features_with_pos(context), sense) for (context, sense) in training_data]\n",
544
+ "\n",
545
+ "# Shuffle and split the data into training and test sets\n",
546
+ "random.shuffle(feature_sets_with_pos)\n",
547
+ "train_set, test_set = feature_sets_with_pos[:24], feature_sets_with_pos[24:]\n",
548
+ "\n",
549
+ "# Train the Naïve Bayes classifier with POS and contextual features\n",
550
+ "classifier_with_pos = NaiveBayesClassifier.train(train_set)\n",
551
+ "\n",
552
+ "# Evaluate the classifier\n",
553
+ "print(f'Accuracy with POS and contextual features: {accuracy(classifier_with_pos, test_set):.2f}')\n",
554
+ "classifier_with_pos.show_most_informative_features()\n",
555
+ "\n",
556
+ "# Sample prediction\n",
557
+ "new_context = \"He likes to fish by the bank.\"\n",
558
+ "features_with_pos = extract_features_with_pos(new_context)\n",
559
+ "predicted_sense_with_pos = classifier_with_pos.classify(features_with_pos)\n",
560
+ "print(f\"The predicted sense for '{new_context}' with POS and contextual features is '{predicted_sense_with_pos}'\")"
561
+ ]
562
+ },
563
+ {
564
+ "cell_type": "code",
565
+ "execution_count": 18,
566
+ "id": "c0316930-0a0d-4b99-9d4c-9857e8c30565",
567
+ "metadata": {},
568
+ "outputs": [
569
+ {
570
+ "name": "stdout",
571
+ "output_type": "stream",
572
+ "text": [
573
+ "[('The', 'children played by the river bank. river'), ('They', 'set up a picnic by the river bank. river'), ('We', 'spent the afternoon walking along the river bank. river'), ('He', 'enjoys kayaking near the river bank every weekend. river'), ('The', 'river bank was bustling with people fishing. river'), ('The', 'river flooded and covered the bank with water. river'), ('We', 'followed the river bank trail through the forest. river'), ('The', 'boat was anchored by the river bank. river'), ('The', 'river bank was a perfect spot for our tent. river'), ('Wildflowers', 'grew along the river bank. river'), ('The', 'river bank had eroded after the heavy rains. river'), ('I', 'went to the bank to deposit a check. finance'), ('The', 'bank approved my loan application. finance'), ('She', 'worked as a teller at the local bank. finance'), ('They', 'offer excellent financial services at this bank. finance'), ('You', 'can open an account at any bank in town. finance'), ('The', 'bank charges high interest rates on loans. finance'), ('Our', 'local bank has a great mobile app. finance'), ('He', 'withdrew cash from the bank. finance'), ('She', 'has a meeting with the bank manager. finance'), ('The', 'bank is closed on public holidays. finance'), ('They', 'are opening a new bank branch downtown. finance'), ('She', 'visited the bank to discuss her investment portfolio. finance'), ('The', 'bank provided a financial report for the last quarter. finance'), ('The', \"bank's new policy on loans is quite strict. finance\"), ('He', 'worked in a bank before starting his own business. finance'), ('The', 'bank approved a loan application yesterday. finance')]\n"
574
+ ]
575
+ }
576
+ ],
577
+ "source": [
578
+ "# Define the file path\n",
579
+ "file_path = 'E://126156048/leb_3/training_set.txt'\n",
580
+ "\n",
581
+ "# Open the file and read lines\n",
582
+ "with open(file_path, 'r') as file:\n",
583
+ " # Create a list of tuples from each line\n",
584
+ " data = [tuple(line.strip().split(' ', 1)) for line in file]\n",
585
+ "\n",
586
+ "# Print the result\n",
587
+ "print(data)"
588
+ ]
589
+ }
590
+ ],
591
+ "metadata": {
592
+ "kernelspec": {
593
+ "display_name": "Python 3 (ipykernel)",
594
+ "language": "python",
595
+ "name": "python3"
596
+ },
597
+ "language_info": {
598
+ "codemirror_mode": {
599
+ "name": "ipython",
600
+ "version": 3
601
+ },
602
+ "file_extension": ".py",
603
+ "mimetype": "text/x-python",
604
+ "name": "python",
605
+ "nbconvert_exporter": "python",
606
+ "pygments_lexer": "ipython3",
607
+ "version": "3.11.1"
608
+ }
609
+ },
610
+ "nbformat": 4,
611
+ "nbformat_minor": 5
612
+ }