noshot 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. noshot/data/ML TS XAI/ML/1. PCA - EDA/PCA-EDA.ipynb +207 -0
  2. noshot/data/ML TS XAI/ML/1. PCA - EDA/balance-scale.csv +626 -0
  3. noshot/data/ML TS XAI/ML/1. PCA - EDA/input.txt +625 -0
  4. noshot/data/ML TS XAI/ML/2. KNN Classifier/KNN.ipynb +287 -0
  5. noshot/data/ML TS XAI/ML/2. KNN Classifier/balance-scale.csv +626 -0
  6. noshot/data/ML TS XAI/ML/2. KNN Classifier/input.txt +625 -0
  7. noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/LDA.ipynb +83 -0
  8. noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/balance-scale.csv +626 -0
  9. noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis/input.txt +625 -0
  10. noshot/data/ML TS XAI/ML/4. Linear Regression/Linear-Regression.ipynb +117 -0
  11. noshot/data/ML TS XAI/ML/4. Linear Regression/machine-data.csv +210 -0
  12. noshot/data/ML TS XAI/ML/5. Logistic Regression/Logistic-Regression.ipynb +137 -0
  13. noshot/data/ML TS XAI/ML/5. Logistic Regression/wine-dataset.csv +179 -0
  14. noshot/data/ML TS XAI/ML/6. Bayesian Classifier/Bayesian.ipynb +129 -0
  15. noshot/data/ML TS XAI/ML/6. Bayesian Classifier/wine-dataset.csv +179 -0
  16. noshot/data/ML TS XAI/TS/1. EDA - Handling Time Series Data/Handling TS Data.ipynb +784 -0
  17. noshot/data/ML TS XAI/TS/1. EDA - Handling Time Series Data/raw_sales.csv +29581 -0
  18. noshot/data/ML TS XAI/TS/2. Feature Engineering/Feature Engineering-.ipynb +1445 -0
  19. noshot/data/ML TS XAI/TS/3. Temporal Relationships/Exploring Temporal Relationships.ipynb +603 -0
  20. noshot/data/ML TS XAI/TS/4. Up-Down-Sampling and Interploation/Up-Down-Sampling.ipynb +721 -0
  21. noshot/data/ML TS XAI/TS/4. Up-Down-Sampling and Interploation/shampoo_sales.csv +37 -0
  22. noshot/data/ML TS XAI/TS/5. Stationarity - Trend - Seasonality/Stationarity-Trend-Seasonality.ipynb +392 -0
  23. noshot/data/ML TS XAI/TS/5. Stationarity - Trend - Seasonality/daily-min-temperatures.csv +3651 -0
  24. noshot/data/ML TS XAI/TS/5. Stationarity - Trend - Seasonality/daily-total-female-births.csv +366 -0
  25. noshot/data/ML TS XAI/TS/6. Autocorrelation - Partial Autocorrelation/ACF-PACF.ipynb +175 -0
  26. noshot/data/ML TS XAI/TS/6. Autocorrelation - Partial Autocorrelation/daily-min-temperatures.csv +3651 -0
  27. {noshot-0.1.7.dist-info → noshot-0.1.9.dist-info}/METADATA +2 -2
  28. noshot-0.1.9.dist-info/RECORD +35 -0
  29. noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(A) Breadth First Search.ipynb +0 -112
  30. noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(B) Depth First Search.ipynb +0 -111
  31. noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(C) Uniform Cost Search.ipynb +0 -134
  32. noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(D) Depth Limites Search.ipynb +0 -115
  33. noshot/data/ML TS XAI/AIDS/1. Implement Basic Search Strategies/(E) Iterative Deepening DFS.ipynb +0 -123
  34. noshot/data/ML TS XAI/AIDS/10. ANOVA/2_ANOVA.csv +0 -769
  35. noshot/data/ML TS XAI/AIDS/10. ANOVA/One Way ANOVA (Repeated Measure).ipynb +0 -126
  36. noshot/data/ML TS XAI/AIDS/10. ANOVA/One Way ANOVA.ipynb +0 -134
  37. noshot/data/ML TS XAI/AIDS/10. ANOVA/Sample 1 Way ANOVA Test.ipynb +0 -119
  38. noshot/data/ML TS XAI/AIDS/10. ANOVA/Two Way ANOVA.ipynb +0 -138
  39. noshot/data/ML TS XAI/AIDS/10. ANOVA/reaction_time.csv +0 -5
  40. noshot/data/ML TS XAI/AIDS/10. ANOVA/sample_data.csv +0 -16
  41. noshot/data/ML TS XAI/AIDS/10. ANOVA/sleep_deprivation.csv +0 -4
  42. noshot/data/ML TS XAI/AIDS/11. Linear Regression/3_Linear.csv +0 -4802
  43. noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression LAB.ipynb +0 -113
  44. noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression New- sklearn.ipynb +0 -118
  45. noshot/data/ML TS XAI/AIDS/11. Linear Regression/Linear Regression.ipynb +0 -148
  46. noshot/data/ML TS XAI/AIDS/11. Linear Regression/house_rate.csv +0 -22
  47. noshot/data/ML TS XAI/AIDS/12. Logistic Regression/Logistic Regression New- sklearn.ipynb +0 -128
  48. noshot/data/ML TS XAI/AIDS/12. Logistic Regression/Logistic Regression.ipynb +0 -145
  49. noshot/data/ML TS XAI/AIDS/12. Logistic Regression/default.csv +0 -1001
  50. noshot/data/ML TS XAI/AIDS/12. Logistic Regression/hours_scores_records.csv +0 -101
  51. noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(A) Astar.ipynb +0 -256
  52. noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(B) IDAstar.ipynb +0 -157
  53. noshot/data/ML TS XAI/AIDS/2. Implement A Star And MA Star/(C) SMAstar.ipynb +0 -178
  54. noshot/data/ML TS XAI/AIDS/3. Genetic Algorithm/Genetic.ipynb +0 -95
  55. noshot/data/ML TS XAI/AIDS/4. Simulated Annealing/Simulated Annealing.ipynb +0 -74
  56. noshot/data/ML TS XAI/AIDS/4. Simulated Annealing/Sudoku Simulated Annealing.ipynb +0 -103
  57. noshot/data/ML TS XAI/AIDS/5. Alpha Beta Pruning/AlphaBetaPruning.ipynb +0 -182
  58. noshot/data/ML TS XAI/AIDS/6. Consraint Satisfaction Problems (CSP)/(A) CSP House Allocation.ipynb +0 -120
  59. noshot/data/ML TS XAI/AIDS/6. Consraint Satisfaction Problems (CSP)/(B) CSP Map Coloring.ipynb +0 -125
  60. noshot/data/ML TS XAI/AIDS/7. Random Sampling/Random Sampling.ipynb +0 -73
  61. noshot/data/ML TS XAI/AIDS/7. Random Sampling/height_weight_bmi.csv +0 -8389
  62. noshot/data/ML TS XAI/AIDS/8. Z Test/Z Test Hash Function.ipynb +0 -141
  63. noshot/data/ML TS XAI/AIDS/8. Z Test/Z Test.ipynb +0 -151
  64. noshot/data/ML TS XAI/AIDS/8. Z Test/height_weight_bmi.csv +0 -8389
  65. noshot/data/ML TS XAI/AIDS/9. T Test/1_heart.csv +0 -304
  66. noshot/data/ML TS XAI/AIDS/9. T Test/Independent T Test.ipynb +0 -119
  67. noshot/data/ML TS XAI/AIDS/9. T Test/Paired T Test.ipynb +0 -118
  68. noshot/data/ML TS XAI/AIDS/9. T Test/T Test Hash Function.ipynb +0 -142
  69. noshot/data/ML TS XAI/AIDS/9. T Test/T Test.ipynb +0 -158
  70. noshot/data/ML TS XAI/AIDS/9. T Test/height_weight_bmi.csv +0 -8389
  71. noshot/data/ML TS XAI/AIDS/9. T Test/iq_test.csv +0 -0
  72. noshot/data/ML TS XAI/AIDS/Others (AllinOne)/All In One.ipynb +0 -4581
  73. noshot/data/ML TS XAI/CN/1. Chat Application/chat.java +0 -81
  74. noshot/data/ML TS XAI/CN/1. Chat Application/output.png +0 -0
  75. noshot/data/ML TS XAI/CN/1. Chat Application/procedure.png +0 -0
  76. noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/LAN.tcl +0 -65
  77. noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/analysis.awk +0 -44
  78. noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/output.png +0 -0
  79. noshot/data/ML TS XAI/CN/10. Ethernet LAN IEEE 802.3/procedure.png +0 -0
  80. noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/complexdcf.tcl +0 -229
  81. noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/output.png +0 -0
  82. noshot/data/ML TS XAI/CN/11. Wireless LAN IEEE 802.11/procedure.png +0 -0
  83. noshot/data/ML TS XAI/CN/2. File Transfer/file_to_send.txt +0 -2
  84. noshot/data/ML TS XAI/CN/2. File Transfer/filetransfer.java +0 -119
  85. noshot/data/ML TS XAI/CN/2. File Transfer/output.png +0 -0
  86. noshot/data/ML TS XAI/CN/2. File Transfer/procedure.png +0 -0
  87. noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/Client.class +0 -0
  88. noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/MyServerImpl.class +0 -0
  89. noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/MyServerIntf.class +0 -0
  90. noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/Server.class +0 -0
  91. noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/output.png +0 -0
  92. noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/procedure.png +0 -0
  93. noshot/data/ML TS XAI/CN/3. RMI (Remote Method Invocation)/rmi.java +0 -56
  94. noshot/data/ML TS XAI/CN/4. Wired Network/output.png +0 -0
  95. noshot/data/ML TS XAI/CN/4. Wired Network/procedure.png +0 -0
  96. noshot/data/ML TS XAI/CN/4. Wired Network/wired.awk +0 -25
  97. noshot/data/ML TS XAI/CN/4. Wired Network/wired.tcl +0 -81
  98. noshot/data/ML TS XAI/CN/5. Wireless Network/output.png +0 -0
  99. noshot/data/ML TS XAI/CN/5. Wireless Network/procedure.png +0 -0
  100. noshot/data/ML TS XAI/CN/5. Wireless Network/wireless.awk +0 -27
  101. noshot/data/ML TS XAI/CN/5. Wireless Network/wireless.tcl +0 -153
  102. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/analysis.awk +0 -27
  103. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/output.png +0 -0
  104. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/sack.tcl +0 -86
  105. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/vegas.tcl +0 -86
  106. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/analysis.awk +0 -28
  107. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/output.png +0 -0
  108. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/reno.tcl +0 -78
  109. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/tahoe.tcl +0 -79
  110. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/analysis.awk +0 -27
  111. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/flow.tcl +0 -163
  112. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/TCP Flow Control/output.png +0 -0
  113. noshot/data/ML TS XAI/CN/6. TCP Flow And Congestion Control/procedure.png +0 -0
  114. noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/DV.tcl +0 -111
  115. noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/LS.tcl +0 -106
  116. noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/analysis.awk +0 -36
  117. noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/output.png +0 -0
  118. noshot/data/ML TS XAI/CN/7. Link State And Distance Vector Routing/procedure.png +0 -0
  119. noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/analysis.awk +0 -20
  120. noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/broadcast.tcl +0 -76
  121. noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/multicast.tcl +0 -103
  122. noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/output.png +0 -0
  123. noshot/data/ML TS XAI/CN/8. Multicast And Broadcast Routing/procedure.png +0 -0
  124. noshot/data/ML TS XAI/CN/9. DHCP/DHCP.java +0 -125
  125. noshot/data/ML TS XAI/CN/9. DHCP/output.png +0 -0
  126. noshot/data/ML TS XAI/CN/9. DHCP/procedure.png +0 -0
  127. noshot/data/ML TS XAI/NLP/NLP 1/1-Prereqs.py +0 -18
  128. noshot/data/ML TS XAI/NLP/NLP 1/2-Chi2test.py +0 -83
  129. noshot/data/ML TS XAI/NLP/NLP 1/2-T-test.py +0 -79
  130. noshot/data/ML TS XAI/NLP/NLP 1/3-WSD-nb.py +0 -53
  131. noshot/data/ML TS XAI/NLP/NLP 1/4-Hindle-Rooth.py +0 -53
  132. noshot/data/ML TS XAI/NLP/NLP 1/5-HMM-Trellis.py +0 -82
  133. noshot/data/ML TS XAI/NLP/NLP 1/6-HMM-Viterbi.py +0 -16
  134. noshot/data/ML TS XAI/NLP/NLP 1/7-PCFG-parsetree.py +0 -15
  135. noshot/data/ML TS XAI/NLP/NLP 1/Chi2test.ipynb +0 -285
  136. noshot/data/ML TS XAI/NLP/NLP 1/Hindle-Rooth.ipynb +0 -179
  137. noshot/data/ML TS XAI/NLP/NLP 1/Lab 10 - Text generator using LSTM.ipynb +0 -1461
  138. noshot/data/ML TS XAI/NLP/NLP 1/Lab 11 NMT.ipynb +0 -2307
  139. noshot/data/ML TS XAI/NLP/NLP 1/PCFG.ipynb +0 -134
  140. noshot/data/ML TS XAI/NLP/NLP 1/Prereqs.ipynb +0 -131
  141. noshot/data/ML TS XAI/NLP/NLP 1/T test.ipynb +0 -252
  142. noshot/data/ML TS XAI/NLP/NLP 1/TFIDF BOW.ipynb +0 -171
  143. noshot/data/ML TS XAI/NLP/NLP 1/Trellis.ipynb +0 -244
  144. noshot/data/ML TS XAI/NLP/NLP 1/WSD.ipynb +0 -645
  145. noshot/data/ML TS XAI/NLP/NLP 1/Word2Vec.ipynb +0 -93
  146. noshot/data/ML TS XAI/NLP/NLP 2/Lab01(tokenizer)/tokenizer.ipynb +0 -370
  147. noshot/data/ML TS XAI/NLP/NLP 2/Lab01(tokenizer)/training_tokenizer.txt +0 -6
  148. noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/exp0.ipynb +0 -274
  149. noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/lab2.ipynb +0 -905
  150. noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/test.txt +0 -1
  151. noshot/data/ML TS XAI/NLP/NLP 2/Lab02(stemming)/tokenizing.ipynb +0 -272
  152. noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/collocation.ipynb +0 -332
  153. noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/lab3.ipynb +0 -549
  154. noshot/data/ML TS XAI/NLP/NLP 2/Lab03(parse-tree)/nlp.txt +0 -1
  155. noshot/data/ML TS XAI/NLP/NLP 2/Lab04(collocation)/Lab4-NLP-Exp-2.ipynb +0 -817
  156. noshot/data/ML TS XAI/NLP/NLP 2/Lab04(collocation)/collocation.ipynb +0 -332
  157. noshot/data/ML TS XAI/NLP/NLP 2/Lab05(WSD)/NLP-Lab-5-Exp3.ipynb +0 -231
  158. noshot/data/ML TS XAI/NLP/NLP 2/Lab05(WSD)/word-sense-disambiguation.ipynb +0 -507
  159. noshot/data/ML TS XAI/NLP/NLP 2/Lab06(additional-exercise)/lab6.ipynb +0 -134
  160. noshot/data/ML TS XAI/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP Exp 4.ipynb +0 -255
  161. noshot/data/ML TS XAI/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP_Exp_5.ipynb +0 -159
  162. noshot/data/ML TS XAI/NLP/NLP 2/Lab08(PCFG)/PCFG.ipynb +0 -282
  163. noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/Lab 9 - MLP classifier.ipynb +0 -670
  164. noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/MLP-alternative-code.ipynb +0 -613
  165. noshot/data/ML TS XAI/NLP/NLP 2/Lab09-Hindle-rooth&MLP/hindle-rooth-algorithm.ipynb +0 -74
  166. noshot/data/ML TS XAI/NLP/NLP 2/Lab10(LSTM)/Lab_10_Text_generator_using_LSTM.ipynb +0 -480
  167. noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Machine-translation.ipynb +0 -445
  168. noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Viterbi-PCFG.ipynb +0 -105
  169. noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/corpora_tools.py +0 -87
  170. noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/data_utils.py +0 -11
  171. noshot/data/ML TS XAI/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/train_translator.py +0 -83
  172. noshot/data/ML TS XAI/NLP/NLP 2/Lab12(Information-Extraction)/Information_Extraction.ipynb +0 -201
  173. noshot/data/ML TS XAI/NLP/NLP 3/Backtrack-without-Verbitri.ipynb +0 -185
  174. noshot/data/ML TS XAI/NLP/NLP 3/Backward-Procedure.ipynb +0 -597
  175. noshot/data/ML TS XAI/NLP/NLP 3/Bag_of.ipynb +0 -1422
  176. noshot/data/ML TS XAI/NLP/NLP 3/CYK-algorithm.ipynb +0 -1067
  177. noshot/data/ML TS XAI/NLP/NLP 3/Forward-Procedure.ipynb +0 -477
  178. noshot/data/ML TS XAI/NLP/NLP 3/LSTM.ipynb +0 -1290
  179. noshot/data/ML TS XAI/NLP/NLP 3/Lab 10 - Text generator using LSTM.ipynb +0 -1461
  180. noshot/data/ML TS XAI/NLP/NLP 3/Lab 11 NMT.ipynb +0 -2307
  181. noshot/data/ML TS XAI/NLP/NLP 3/NLP-LAB-4.ipynb +0 -216
  182. noshot/data/ML TS XAI/NLP/NLP 3/NLP-LAB-5.ipynb +0 -216
  183. noshot/data/ML TS XAI/NLP/NLP 3/abc.txt +0 -6
  184. noshot/data/ML TS XAI/NLP/NLP 3/ex-1-nltk.ipynb +0 -711
  185. noshot/data/ML TS XAI/NLP/NLP 3/ex-2-nlp.ipynb +0 -267
  186. noshot/data/ML TS XAI/NLP/NLP 3/exp8&9.ipynb +0 -305
  187. noshot/data/ML TS XAI/NLP/NLP 3/hind.ipynb +0 -287
  188. noshot/data/ML TS XAI/NLP/NLP 3/lab66.ipynb +0 -752
  189. noshot/data/ML TS XAI/NLP/NLP 3/leb_3.ipynb +0 -612
  190. noshot/data/ML TS XAI/NLP/NLP 3/naive_bayes_classifier.pkl +0 -0
  191. noshot/data/ML TS XAI/NLP/NLP 3/nlp_leb_1.ipynb +0 -3008
  192. noshot/data/ML TS XAI/NLP/NLP 3/nlp_leb_2.ipynb +0 -3095
  193. noshot/data/ML TS XAI/NLP/NLP 3/nlplab-9.ipynb +0 -295
  194. noshot/data/ML TS XAI/NLP/NLP 3/nltk-ex-4.ipynb +0 -506
  195. noshot/data/ML TS XAI/NLP/NLP 3/text1.txt +0 -48
  196. noshot/data/ML TS XAI/NLP/NLP 3/text2.txt +0 -8
  197. noshot/data/ML TS XAI/NLP/NLP 3/text3.txt +0 -48
  198. noshot/data/ML TS XAI/NLP/NLP 3/translation-rnn.ipynb +0 -812
  199. noshot/data/ML TS XAI/NLP/NLP 3/word2vector.ipynb +0 -173
  200. noshot/data/ML TS XAI/NLP/NLP 4/Backward Procedure Algorithm.ipynb +0 -179
  201. noshot/data/ML TS XAI/NLP/NLP 4/Chi Square Collocation.ipynb +0 -208
  202. noshot/data/ML TS XAI/NLP/NLP 4/Collocation (T test).ipynb +0 -188
  203. noshot/data/ML TS XAI/NLP/NLP 4/Experiment 1.ipynb +0 -437
  204. noshot/data/ML TS XAI/NLP/NLP 4/Forward Procedure Algorithm.ipynb +0 -132
  205. noshot/data/ML TS XAI/NLP/NLP 4/Hindle Rooth.ipynb +0 -414
  206. noshot/data/ML TS XAI/NLP/NLP 4/MachineTranslation.ipynb +0 -368
  207. noshot/data/ML TS XAI/NLP/NLP 4/Multi Layer Perceptron using MLPClassifier.ipynb +0 -86
  208. noshot/data/ML TS XAI/NLP/NLP 4/Multi Layer Perceptron using Tensorflow.ipynb +0 -112
  209. noshot/data/ML TS XAI/NLP/NLP 4/PCFG Inside Probability.ipynb +0 -451
  210. noshot/data/ML TS XAI/NLP/NLP 4/Text Generation using LSTM.ipynb +0 -297
  211. noshot/data/ML TS XAI/NLP/NLP 4/Viterbi.ipynb +0 -310
  212. noshot/data/ML TS XAI/NLP/NLP 4/Word Sense Disambiguation.ipynb +0 -335
  213. noshot/data/ML TS XAI/NLP/NLP 5/10.Text Generation using LSTM.ipynb +0 -316
  214. noshot/data/ML TS XAI/NLP/NLP 5/11.Machine Translation.ipynb +0 -868
  215. noshot/data/ML TS XAI/NLP/NLP 5/2.T and Chi2 Test.ipynb +0 -204
  216. noshot/data/ML TS XAI/NLP/NLP 5/3.Word Sense Diambiguation.ipynb +0 -234
  217. noshot/data/ML TS XAI/NLP/NLP 5/4.Hinddle and Rooth.ipynb +0 -128
  218. noshot/data/ML TS XAI/NLP/NLP 5/5.Forward and Backward.ipynb +0 -149
  219. noshot/data/ML TS XAI/NLP/NLP 5/6.Viterbi.ipynb +0 -111
  220. noshot/data/ML TS XAI/NLP/NLP 5/7.PCFG Parse Tree.ipynb +0 -134
  221. noshot/data/ML TS XAI/NLP/NLP 5/7.PCFG using cyk.ipynb +0 -101
  222. noshot/data/ML TS XAI/NLP/NLP 5/8.Bag of words and TF-IDF.ipynb +0 -310
  223. noshot/data/ML TS XAI/NLP/NLP 5/9.Word2Vector.ipynb +0 -78
  224. noshot/data/ML TS XAI/NLP/NLP 5/NLP ALL In One.ipynb +0 -2619
  225. noshot/data/ML TS XAI/NLP/NLP 5/sample1.txt +0 -15
  226. noshot/data/ML TS XAI/NLP/NLP 5/sample2.txt +0 -4
  227. noshot/data/ML TS XAI/NLP/NLP 5/word2vec_model.bin +0 -0
  228. noshot/data/ML TS XAI/NLP/NLP 6/1. Tokenize, Tagging, NER, Parse Tree.ipynb +0 -312
  229. noshot/data/ML TS XAI/NLP/NLP 6/2. T Test and Chi2 Test.ipynb +0 -185
  230. noshot/data/ML TS XAI/NLP/NLP 6/3. Naive Bayes WSD.ipynb +0 -199
  231. noshot/data/ML TS XAI/NLP/NLP 6/4. Hinddle and Rooth.ipynb +0 -151
  232. noshot/data/ML TS XAI/NLP/NLP 6/5 and 6 FWD, BWD, Viterbi.ipynb +0 -164
  233. noshot/data/ML TS XAI/NLP/NLP 6/7. PCFG using CYK.ipynb +0 -383
  234. noshot/data/ML TS XAI/NLP/NLP 6/8. BOW and TF-IDF.ipynb +0 -252
  235. noshot/data/ML TS XAI/Ubuntu CN Lab.iso +0 -0
  236. noshot-0.1.7.dist-info/RECORD +0 -216
  237. {noshot-0.1.7.dist-info → noshot-0.1.9.dist-info}/LICENSE.txt +0 -0
  238. {noshot-0.1.7.dist-info → noshot-0.1.9.dist-info}/WHEEL +0 -0
  239. {noshot-0.1.7.dist-info → noshot-0.1.9.dist-info}/top_level.txt +0 -0
@@ -1,1067 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "id": "465112ff-5cd0-4b7b-9722-da197d0593d7",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "Span 0-0 -> NP: Probability = 0.100000\n",
14
- "Span 1-1 -> V: Probability = 1.000000\n",
15
- "Span 2-2 -> NP: Probability = 0.180000\n",
16
- "Span 3-3 -> P: Probability = 1.000000\n",
17
- "Span 4-4 -> NP: Probability = 0.180000\n",
18
- "Span 1-2 -> VP: Probability = 0.126000\n",
19
- "Span 3-4 -> PP: Probability = 0.180000\n",
20
- "Span 0-2 -> S: Probability = 0.012600\n",
21
- "Span 1-4 -> VP: Probability = 0.006804\n",
22
- "Span 0-4 -> S: Probability = 0.000680\n"
23
- ]
24
- }
25
- ],
26
- "source": [
27
- "from collections import defaultdict\n",
28
- "\n",
29
- "# Grammar rules with probabilities\n",
30
- "pcfg = {\n",
31
- " ('S', 'NP', 'VP'): 1.0,\n",
32
- " ('VP', 'V', 'NP'): 0.7,\n",
33
- " ('VP', 'VP', 'PP'): 0.3,\n",
34
- " ('PP', 'P', 'NP'): 1.0,\n",
35
- " ('NP', 'astronomers'): 0.1,\n",
36
- " ('NP', 'ears'): 0.18,\n",
37
- " ('NP', 'stars'): 0.18,\n",
38
- " ('NP', 'telescopes'): 0.18,\n",
39
- " ('V', 'saw'): 1.0,\n",
40
- " ('P', 'with'): 1.0\n",
41
- "}\n",
42
- "\n",
43
- "# The sentence we want to parse\n",
44
- "sentence = \"astronomers saw stars with ears\".split()\n",
45
- "\n",
46
- "# Function to perform CYK algorithm\n",
47
- "def cyk_algorithm(pcfg, sentence):\n",
48
- " n = len(sentence)\n",
49
- " # Create a table to store probabilities\n",
50
- " table = defaultdict(float)\n",
51
- " \n",
52
- " # Initialize for the single words (length 1 spans)\n",
53
- " for i, word in enumerate(sentence):\n",
54
- " for rule in pcfg:\n",
55
- " if len(rule) == 2 and rule[1] == word:\n",
56
- " table[(i, i, rule[0])] = pcfg[rule]\n",
57
- " \n",
58
- " # Filling the table for larger spans (length > 1)\n",
59
- " for span in range(2, n+1): # span length\n",
60
- " for i in range(n - span + 1): # starting point of the span\n",
61
- " j = i + span - 1 # ending point of the span\n",
62
- " for k in range(i, j): # split point\n",
63
- " for rule in pcfg:\n",
64
- " if len(rule) == 3: # binary rules\n",
65
- " A, B, C = rule\n",
66
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
67
- " prob = table[(i, k, B)] * table[(k + 1, j, C)] * pcfg[rule]\n",
68
- " if prob > table[(i, j, A)]:\n",
69
- " table[(i, j, A)] = prob\n",
70
- "\n",
71
- " # Print the table with inside probabilities\n",
72
- " return table\n",
73
- "\n",
74
- "# Run the CYK algorithm\n",
75
- "table = cyk_algorithm(pcfg, sentence)\n",
76
- "\n",
77
- "# Print the resulting probabilities\n",
78
- "for key, prob in table.items():\n",
79
- " print(f\"Span {key[0]}-{key[1]} -> {key[2]}: Probability = {prob:.6f}\")\n",
80
- "\n"
81
- ]
82
- },
83
- {
84
- "cell_type": "code",
85
- "execution_count": 2,
86
- "id": "693d06f1-9161-44e5-9c9d-98c9da850b67",
87
- "metadata": {},
88
- "outputs": [
89
- {
90
- "name": "stdout",
91
- "output_type": "stream",
92
- "text": [
93
- "Inside probability of the sequence the cat eats: 0.018000000000000002\n"
94
- ]
95
- }
96
- ],
97
- "source": [
98
- "import numpy as np\n",
99
- "from collections import defaultdict\n",
100
- "\n",
101
- "class PCFG:\n",
102
- " def __init__(self):\n",
103
- " # Non-terminal production rules and their probabilities\n",
104
- " self.productions = defaultdict(list)\n",
105
- " self.terminals = defaultdict(list)\n",
106
- "\n",
107
- " def add_production(self, lhs, rhs, prob):\n",
108
- " \"\"\" Adds a production rule with its probability \"\"\"\n",
109
- " if len(rhs) == 1 and rhs[0].islower(): # Terminal rule\n",
110
- " self.terminals[rhs[0]].append((lhs, prob))\n",
111
- " else: # Non-terminal rule\n",
112
- " self.productions[lhs].append((rhs, prob))\n",
113
- "\n",
114
- "def cyk_pcfg(pcfg, words):\n",
115
- " \"\"\" Applies the CYK algorithm to find the inside probability of a word sequence \"\"\"\n",
116
- " n = len(words)\n",
117
- " non_terminals = list(pcfg.productions.keys())\n",
118
- " \n",
119
- " # Initialize a 3D table for inside probabilities\n",
120
- " P = defaultdict(lambda: np.zeros((n, n)))\n",
121
- " \n",
122
- " # Fill the diagonal with terminal production probabilities\n",
123
- " for i, word in enumerate(words):\n",
124
- " if word in pcfg.terminals:\n",
125
- " for lhs, prob in pcfg.terminals[word]:\n",
126
- " P[lhs][i, i] = prob\n",
127
- "\n",
128
- " # Fill the table for subsequences\n",
129
- " for span in range(2, n + 1): # span length from 2 to n\n",
130
- " for i in range(n - span + 1):\n",
131
- " j = i + span - 1\n",
132
- " for k in range(i, j): # midpoint\n",
133
- " for lhs in non_terminals:\n",
134
- " for rhs, prob in pcfg.productions[lhs]:\n",
135
- " if len(rhs) == 2:\n",
136
- " left, right = rhs\n",
137
- " P[lhs][i, j] += prob * P[left][i, k] * P[right][k + 1, j]\n",
138
- "\n",
139
- " # The inside probability for the start symbol S to derive the entire sequence\n",
140
- " return P['S'][0, n - 1]\n",
141
- "\n",
142
- "# Example Usage:\n",
143
- "\n",
144
- "# Define a PCFG\n",
145
- "pcfg = PCFG()\n",
146
- "pcfg.add_production('S', ['NP', 'VP'], 0.9)\n",
147
- "pcfg.add_production('S', ['VP'], 0.1)\n",
148
- "pcfg.add_production('NP', ['Det', 'N'], 0.5)\n",
149
- "pcfg.add_production('VP', ['V', 'NP'], 0.5)\n",
150
- "pcfg.add_production('VP', ['eats'], 0.1)\n",
151
- "pcfg.add_production('Det', ['the'], 0.8)\n",
152
- "pcfg.add_production('N', ['cat'], 0.5)\n",
153
- "pcfg.add_production('N', ['food'], 0.5)\n",
154
- "pcfg.add_production('V', ['eats'], 1.0)\n",
155
- "\n",
156
- "# Example word sequence\n",
157
- "words = ['the', 'cat', 'eats']\n",
158
- "\n",
159
- "# Calculate inside probability using CYK algorithm\n",
160
- "inside_prob = cyk_pcfg(pcfg, words)\n",
161
- "\n",
162
- "print(f\"Inside probability of the sequence {' '.join(words)}: {inside_prob}\")\n",
163
- "\n"
164
- ]
165
- },
166
- {
167
- "cell_type": "code",
168
- "execution_count": 3,
169
- "id": "92685c0a-36e5-4092-b415-17a39190bf31",
170
- "metadata": {},
171
- "outputs": [
172
- {
173
- "name": "stdout",
174
- "output_type": "stream",
175
- "text": [
176
- "Final Probability of the sentence: 0.000680\n"
177
- ]
178
- }
179
- ],
180
- "source": [
181
- "from collections import defaultdict\n",
182
- "\n",
183
- "# Grammar rules with probabilities\n",
184
- "pcfg = {\n",
185
- " ('S', 'NP', 'VP'): 1.0,\n",
186
- " ('VP', 'V', 'NP'): 0.7,\n",
187
- " ('VP', 'VP', 'PP'): 0.3,\n",
188
- " ('PP', 'P', 'NP'): 1.0,\n",
189
- " ('NP', 'astronomers'): 0.1,\n",
190
- " ('NP', 'ears'): 0.18,\n",
191
- " ('NP', 'stars'): 0.18,\n",
192
- " ('NP', 'telescopes'): 0.18,\n",
193
- " ('V', 'saw'): 1.0,\n",
194
- " ('P', 'with'): 1.0\n",
195
- "}\n",
196
- "\n",
197
- "# The sentence we want to parse\n",
198
- "sentence = \"astronomers saw stars with ears\".split()\n",
199
- "\n",
200
- "# Function to perform CYK algorithm\n",
201
- "def cyk_algorithm(pcfg, sentence):\n",
202
- " n = len(sentence)\n",
203
- " # Create a table to store probabilities\n",
204
- " table = defaultdict(float)\n",
205
- " \n",
206
- " # Initialize for the single words (length 1 spans)\n",
207
- " for i, word in enumerate(sentence):\n",
208
- " for rule in pcfg:\n",
209
- " if len(rule) == 2 and rule[1] == word:\n",
210
- " table[(i, i, rule[0])] = pcfg[rule]\n",
211
- " \n",
212
- " # Filling the table for larger spans (length > 1)\n",
213
- " for span in range(2, n+1): # span length\n",
214
- " for i in range(n - span + 1): # starting point of the span\n",
215
- " j = i + span - 1 # ending point of the span\n",
216
- " for k in range(i, j): # split point\n",
217
- " for rule in pcfg:\n",
218
- " if len(rule) == 3: # binary rules\n",
219
- " A, B, C = rule\n",
220
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
221
- " prob = table[(i, k, B)] * table[(k + 1, j, C)] * pcfg[rule]\n",
222
- " if prob > table[(i, j, A)]:\n",
223
- " table[(i, j, A)] = prob\n",
224
- "\n",
225
- " # Return the final result for the whole sentence\n",
226
- " return table[(0, n-1, 'S')] # The probability of the sentence being an S (sentence)\n",
227
- "\n",
228
- "# Run the CYK algorithm\n",
229
- "final_prob = cyk_algorithm(pcfg, sentence)\n",
230
- "\n",
231
- "# Print the final probability of the sentence\n",
232
- "print(f\"Final Probability of the sentence: {final_prob:.6f}\")\n"
233
- ]
234
- },
235
- {
236
- "cell_type": "code",
237
- "execution_count": 9,
238
- "id": "12d3ba54-fec7-492b-b967-371f607b5f1d",
239
- "metadata": {},
240
- "outputs": [
241
- {
242
- "name": "stdout",
243
- "output_type": "stream",
244
- "text": [
245
- "Final Probability of the sentence: 0.000680\n"
246
- ]
247
- }
248
- ],
249
- "source": [
250
- "from collections import defaultdict\n",
251
- "\n",
252
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
253
- "pcfg = {\n",
254
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
255
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
256
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
257
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
258
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
259
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
260
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
261
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
262
- " ('V', 'saw'): 1.0, # V -> saw\n",
263
- " ('P', 'with'): 1.0 # P -> with\n",
264
- "}\n",
265
- "\n",
266
- "# The sentence we want to parse\n",
267
- "sentence = \"astronomers saw stars with ears\".split()\n",
268
- "\n",
269
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
270
- "def cyk_algorithm(pcfg, sentence):\n",
271
- " n = len(sentence) # Length of the sentence (number of words)\n",
272
- " \n",
273
- " # Table to store probabilities: (start_index, end_index, non-terminal) -> probability\n",
274
- " table = defaultdict(float)\n",
275
- " \n",
276
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
277
- " for i, word in enumerate(sentence):\n",
278
- " for rule in pcfg:\n",
279
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
280
- " table[(i, i, rule[0])] = pcfg[rule]\n",
281
- " \n",
282
- " # Step 2: Fill the table for larger spans (length > 1)\n",
283
- " for span in range(2, n + 1): # span length\n",
284
- " for i in range(n - span + 1): # start index of the span\n",
285
- " j = i + span - 1 # end index of the span\n",
286
- " for k in range(i, j): # split point\n",
287
- " for rule in pcfg:\n",
288
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
289
- " A, B, C = rule # A -> B C\n",
290
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
291
- " prob = table[(i, k, B)] * table[(k + 1, j, C)] * pcfg[rule]\n",
292
- " if prob > table[(i, j, A)]:\n",
293
- " table[(i, j, A)] = prob\n",
294
- "\n",
295
- " # Step 3: Return the final result for the whole sentence as an 'S' (complete sentence)\n",
296
- " # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
297
- " return table[(0, n-1, 'S')] # Probability of the whole sentence being an S (sentence)\n",
298
- "\n",
299
- "# Run the CYK algorithm and get the final probability\n",
300
- "final_prob = cyk_algorithm(pcfg, sentence)\n",
301
- "\n",
302
- "# Print the final probability of the sentence\n",
303
- "if final_prob > 0:\n",
304
- " print(f\"Final Probability of the sentence: {final_prob:.6f}\")\n",
305
- "else:\n",
306
- " print(\"The sentence could not be parsed with the given grammar.\")"
307
- ]
308
- },
309
- {
310
- "cell_type": "code",
311
- "execution_count": 11,
312
- "id": "acadf0b1-acd3-420e-9ea8-04ec2046b694",
313
- "metadata": {},
314
- "outputs": [
315
- {
316
- "name": "stdout",
317
- "output_type": "stream",
318
- "text": [
319
- "Parse t1: Probability = 0.000680, Derivation = ('NP', 'astronomers', 'VP', ('VP', ('V', 'saw', 'NP', 'stars'), 'PP', ('P', 'with', 'NP', 'ears')))\n"
320
- ]
321
- }
322
- ],
323
- "source": [
324
- "from collections import defaultdict\n",
325
- "\n",
326
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
327
- "pcfg = {\n",
328
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
329
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
330
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
331
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
332
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
333
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
334
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
335
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
336
- " ('V', 'saw'): 1.0, # V -> saw\n",
337
- " ('P', 'with'): 1.0 # P -> with\n",
338
- "}\n",
339
- "\n",
340
- "# The sentence we want to parse\n",
341
- "sentence = \"astronomers saw stars with ears\".split()\n",
342
- "\n",
343
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
344
- "def cyk_algorithm(pcfg, sentence):\n",
345
- " n = len(sentence) # Length of the sentence (number of words)\n",
346
- " \n",
347
- " # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
348
- " table = defaultdict(list)\n",
349
- " \n",
350
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
351
- " for i, word in enumerate(sentence):\n",
352
- " for rule in pcfg:\n",
353
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
354
- " table[(i, i, rule[0])].append((pcfg[rule], word))\n",
355
- " \n",
356
- " # Step 2: Fill the table for larger spans (length > 1)\n",
357
- " for span in range(2, n + 1): # span length\n",
358
- " for i in range(n - span + 1): # start index of the span\n",
359
- " j = i + span - 1 # end index of the span\n",
360
- " for k in range(i, j): # split point\n",
361
- " for rule in pcfg:\n",
362
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
363
- " A, B, C = rule # A -> B C\n",
364
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
365
- " for prob1, derivation1 in table[(i, k, B)]:\n",
366
- " for prob2, derivation2 in table[(k + 1, j, C)]:\n",
367
- " prob = prob1 * prob2 * pcfg[rule]\n",
368
- " table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
369
- "\n",
370
- " # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
371
- " # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
372
- " return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
373
- "\n",
374
- "# Run the CYK algorithm and get all possible parses\n",
375
- "parses = cyk_algorithm(pcfg, sentence)\n",
376
- "\n",
377
- "# Print the final probabilities and derivations of the sentence\n",
378
- "if parses:\n",
379
- " for idx, (prob, derivation) in enumerate(parses, start=1):\n",
380
- " print(f\"Parse t{idx}: Probability = {prob:.6f}, Derivation = {derivation}\")\n",
381
- "else:\n",
382
- " print(\"The sentence could not be parsed with the given grammar.\")\n"
383
- ]
384
- },
385
- {
386
- "cell_type": "code",
387
- "execution_count": 14,
388
- "id": "990ba565-bf59-49b3-aef9-355a6785e6dc",
389
- "metadata": {},
390
- "outputs": [
391
- {
392
- "name": "stdout",
393
- "output_type": "stream",
394
- "text": [
395
- "Parse t1: Probability = 0.000680, Derivation = ('NP', 'astronomers', 'VP', ('VP', ('V', 'saw', 'NP', 'stars'), 'PP', ('P', 'with', 'NP', 'ears')))\n"
396
- ]
397
- }
398
- ],
399
- "source": [
400
- "from collections import defaultdict\n",
401
- "\n",
402
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
403
- "pcfg = {\n",
404
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
405
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
406
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
407
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
408
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
409
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
410
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
411
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
412
- " ('V', 'saw'): 1.0, # V -> saw\n",
413
- " ('P', 'with'): 1.0 # P -> with\n",
414
- "}\n",
415
- "\n",
416
- "# The sentence we want to parse\n",
417
- "sentence = \"astronomers saw stars with ears\".split()\n",
418
- "\n",
419
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
420
- "def cyk_algorithm(pcfg, sentence):\n",
421
- " n = len(sentence) # Length of the sentence (number of words)\n",
422
- " \n",
423
- " # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
424
- " table = defaultdict(list)\n",
425
- " \n",
426
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
427
- " for i, word in enumerate(sentence):\n",
428
- " for rule in pcfg:\n",
429
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
430
- " table[(i, i, rule[0])].append((pcfg[rule], word))\n",
431
- " \n",
432
- " # Step 2: Fill the table for larger spans (length > 1)\n",
433
- " for span in range(2, n + 1): # span length\n",
434
- " for i in range(n - span + 1): # start index of the span\n",
435
- " j = i + span - 1 # end index of the span\n",
436
- " for k in range(i, j): # split point\n",
437
- " for rule in pcfg:\n",
438
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
439
- " A, B, C = rule # A -> B C\n",
440
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
441
- " for prob1, derivation1 in table[(i, k, B)]:\n",
442
- " for prob2, derivation2 in table[(k + 1, j, C)]:\n",
443
- " prob = prob1 * prob2 * pcfg[rule]\n",
444
- " table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
445
- "\n",
446
- " # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
447
- " # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
448
- " return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
449
- "\n",
450
- "# Run the CYK algorithm and get all possible parses\n",
451
- "parses = cyk_algorithm(pcfg, sentence)\n",
452
- "\n",
453
- "# Print the final probabilities and derivations of the sentence\n",
454
- "if parses:\n",
455
- " for idx, (prob, derivation) in enumerate(parses, start=1):\n",
456
- " print(f\"Parse t{idx}: Probability = {prob:.6f}, Derivation = {derivation}\")\n",
457
- "else:\n",
458
- " print(\"The sentence could not be parsed with the given grammar.\")\n"
459
- ]
460
- },
461
- {
462
- "cell_type": "code",
463
- "execution_count": 19,
464
- "id": "03b70885-e274-4f15-b617-9ce8cbea6ff9",
465
- "metadata": {},
466
- "outputs": [
467
- {
468
- "name": "stdout",
469
- "output_type": "stream",
470
- "text": [
471
- "Parse t1: Probability = 0.000680, Derivation = ('NP', 'astronomers', 'VP', ('VP', ('V', 'saw', 'NP', 'stars'), 'PP', ('P', 'with', 'NP', 'ears')))\n"
472
- ]
473
- }
474
- ],
475
- "source": [
476
- "from collections import defaultdict\n",
477
- "\n",
478
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
479
- "pcfg = {\n",
480
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
481
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
482
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
483
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
484
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
485
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
486
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
487
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
488
- " ('V', 'saw'): 1.0, # V -> saw\n",
489
- " ('P', 'with'): 1.0 # P -> with\n",
490
- "}\n",
491
- "\n",
492
- "# The sentence we want to parse\n",
493
- "sentence = \"astronomers saw stars with ears\".split()\n",
494
- "\n",
495
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
496
- "def cyk_algorithm(pcfg, sentence):\n",
497
- " n = len(sentence) # Length of the sentence (number of words)\n",
498
- " \n",
499
- " # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
500
- " table = defaultdict(list)\n",
501
- " \n",
502
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
503
- " for i, word in enumerate(sentence):\n",
504
- " for rule in pcfg:\n",
505
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
506
- " table[(i, i, rule[0])].append((pcfg[rule], word))\n",
507
- " \n",
508
- " # Step 2: Fill the table for larger spans (length > 1)\n",
509
- " for span in range(2, n + 1): # span length\n",
510
- " for i in range(n - span + 1): # start index of the span\n",
511
- " j = i + span - 1 # end index of the span\n",
512
- " for k in range(i, j): # split point\n",
513
- " for rule in pcfg:\n",
514
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
515
- " A, B, C = rule # A -> B C\n",
516
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
517
- " for prob1, derivation1 in table[(i, k, B)]:\n",
518
- " for prob2, derivation2 in table[(k + 1, j, C)]:\n",
519
- " prob = prob1 * prob2 * pcfg[rule]\n",
520
- " table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
521
- "\n",
522
- " # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
523
- " # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
524
- " return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
525
- "\n",
526
- "# Run the CYK algorithm and get all possible parses\n",
527
- "parses = cyk_algorithm(pcfg, sentence)\n",
528
- "\n",
529
- "# Print the final probabilities and derivations of the sentence\n",
530
- "if parses:\n",
531
- " for idx, (prob, derivation) in enumerate(parses, start=1):\n",
532
- " print(f\"Parse t{idx}: Probability = {prob:.6f}, Derivation = {derivation}\")\n",
533
- " #print(parses)\n",
534
- "else:\n",
535
- " print(\"The sentence could not be parsed with the given grammar.\")\n"
536
- ]
537
- },
538
- {
539
- "cell_type": "code",
540
- "execution_count": 1,
541
- "id": "a36738d7-23a5-4a27-a58e-b0df0ee7132b",
542
- "metadata": {},
543
- "outputs": [
544
- {
545
- "name": "stdout",
546
- "output_type": "stream",
547
- "text": [
548
- "Parse t1: Probability = 0.000680\n",
549
- "(NP\n",
550
- " astronomers\n",
551
- " (VP\n",
552
- " (V\n",
553
- " saw\n",
554
- " stars\n",
555
- " )\n",
556
- " (P\n",
557
- " with\n",
558
- " ears\n",
559
- " )\n",
560
- " )\n",
561
- ")\n",
562
- "\n"
563
- ]
564
- }
565
- ],
566
- "source": [
567
- "from collections import defaultdict\n",
568
- "\n",
569
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
570
- "pcfg = {\n",
571
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
572
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
573
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
574
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
575
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
576
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
577
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
578
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
579
- " ('V', 'saw'): 1.0, # V -> saw\n",
580
- " ('P', 'with'): 1.0 # P -> with\n",
581
- "}\n",
582
- "\n",
583
- "# The sentence we want to parse\n",
584
- "sentence = \"astronomers saw stars with ears\".split()\n",
585
- "\n",
586
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
587
- "def cyk_algorithm(pcfg, sentence):\n",
588
- " n = len(sentence) # Length of the sentence (number of words)\n",
589
- " \n",
590
- " # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
591
- " table = defaultdict(list)\n",
592
- " \n",
593
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
594
- " for i, word in enumerate(sentence):\n",
595
- " for rule in pcfg:\n",
596
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
597
- " table[(i, i, rule[0])].append((pcfg[rule], word))\n",
598
- " \n",
599
- " # Step 2: Fill the table for larger spans (length > 1)\n",
600
- " for span in range(2, n + 1): # span length\n",
601
- " for i in range(n - span + 1): # start index of the span\n",
602
- " j = i + span - 1 # end index of the span\n",
603
- " for k in range(i, j): # split point\n",
604
- " for rule in pcfg:\n",
605
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
606
- " A, B, C = rule # A -> B C\n",
607
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
608
- " for prob1, derivation1 in table[(i, k, B)]:\n",
609
- " for prob2, derivation2 in table[(k + 1, j, C)]:\n",
610
- " prob = prob1 * prob2 * pcfg[rule]\n",
611
- " table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
612
- "\n",
613
- " # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
614
- " # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
615
- " return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
616
- "\n",
617
- "# Helper function to print the parse tree in a readable format\n",
618
- "def print_parse_tree(derivation, indent=0):\n",
619
- " if isinstance(derivation, tuple):\n",
620
- " A, derivation1, B, derivation2 = derivation\n",
621
- " print(' ' * indent + f\"({A}\")\n",
622
- " print_parse_tree(derivation1, indent + 2)\n",
623
- " print_parse_tree(derivation2, indent + 2)\n",
624
- " print(' ' * indent + f\")\")\n",
625
- " else:\n",
626
- " print(' ' * indent + derivation)\n",
627
- "\n",
628
- "# Run the CYK algorithm and get all possible parses\n",
629
- "parses = cyk_algorithm(pcfg, sentence)\n",
630
- "\n",
631
- "# Print the final probabilities and derivations of the sentence\n",
632
- "if parses:\n",
633
- " for idx, (prob, derivation) in enumerate(parses, start=1):\n",
634
- " print(f\"Parse t{idx}: Probability = {prob:.6f}\")\n",
635
- " print_parse_tree(derivation)\n",
636
- " print() # Print a blank line between parses\n",
637
- "else:\n",
638
- " print(\"The sentence could not be parsed with the given grammar.\")\n"
639
- ]
640
- },
641
- {
642
- "cell_type": "code",
643
- "execution_count": 2,
644
- "id": "ab75fa9e-9ab1-43f5-9045-e1d7354b4aaa",
645
- "metadata": {},
646
- "outputs": [
647
- {
648
- "name": "stdout",
649
- "output_type": "stream",
650
- "text": [
651
- "Parse t1: Probability = 0.000680\n",
652
- "(NP\n",
653
- " astronomers\n",
654
- " (VP\n",
655
- " (V\n",
656
- " saw\n",
657
- " stars\n",
658
- " )\n",
659
- " (P\n",
660
- " with\n",
661
- " ears\n",
662
- " )\n",
663
- " )\n",
664
- ")\n",
665
- "\n"
666
- ]
667
- }
668
- ],
669
- "source": [
670
- "from collections import defaultdict\n",
671
- "\n",
672
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
673
- "pcfg = {\n",
674
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
675
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
676
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
677
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
678
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
679
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
680
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
681
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
682
- " ('V', 'saw'): 1.0, # V -> saw\n",
683
- " ('P', 'with'): 1.0 # P -> with\n",
684
- "}\n",
685
- "\n",
686
- "# The sentence we want to parse\n",
687
- "sentence = \"astronomers saw stars with ears\".split()\n",
688
- "\n",
689
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
690
- "def cyk_algorithm(pcfg, sentence):\n",
691
- " n = len(sentence) # Length of the sentence (number of words)\n",
692
- " \n",
693
- " # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
694
- " table = defaultdict(list)\n",
695
- " \n",
696
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
697
- " for i, word in enumerate(sentence):\n",
698
- " for rule in pcfg:\n",
699
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
700
- " table[(i, i, rule[0])].append((pcfg[rule], word))\n",
701
- " \n",
702
- " # Step 2: Fill the table for larger spans (length > 1)\n",
703
- " for span in range(2, n + 1): # span length\n",
704
- " for i in range(n - span + 1): # start index of the span\n",
705
- " j = i + span - 1 # end index of the span\n",
706
- " for k in range(i, j): # split point\n",
707
- " for rule in pcfg:\n",
708
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
709
- " A, B, C = rule # A -> B C\n",
710
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
711
- " for prob1, derivation1 in table[(i, k, B)]:\n",
712
- " for prob2, derivation2 in table[(k + 1, j, C)]:\n",
713
- " prob = prob1 * prob2 * pcfg[rule]\n",
714
- " table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
715
- "\n",
716
- " # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
717
- " # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
718
- " return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
719
- "\n",
720
- "# Helper function to print the parse tree in a readable format\n",
721
- "def print_parse_tree(derivation, indent=0):\n",
722
- " if isinstance(derivation, tuple):\n",
723
- " A, derivation1, B, derivation2 = derivation\n",
724
- " print(' ' * indent + f\"({A}\")\n",
725
- " print_parse_tree(derivation1, indent + 2)\n",
726
- " print_parse_tree(derivation2, indent + 2)\n",
727
- " print(' ' * indent + f\")\")\n",
728
- " else:\n",
729
- " print(' ' * indent + derivation)\n",
730
- "\n",
731
- "# Run the CYK algorithm and get all possible parses\n",
732
- "parses = cyk_algorithm(pcfg, sentence)\n",
733
- "\n",
734
- "# Sort parses by probability in descending order\n",
735
- "parses.sort(key=lambda x: x[0], reverse=True)\n",
736
- "\n",
737
- "# Print the final probabilities and derivations of the sentence\n",
738
- "if parses:\n",
739
- " for idx, (prob, derivation) in enumerate(parses, start=1):\n",
740
- " print(f\"Parse t{idx}: Probability = {prob:.6f}\")\n",
741
- " print_parse_tree(derivation)\n",
742
- " print() # Print a blank line between parses\n",
743
- "else:\n",
744
- " print(\"The sentence could not be parsed with the given grammar.\")\n"
745
- ]
746
- },
747
- {
748
- "cell_type": "code",
749
- "execution_count": 3,
750
- "id": "b6de0644-7729-499b-a5f5-e687b20f3e57",
751
- "metadata": {},
752
- "outputs": [
753
- {
754
- "name": "stdout",
755
- "output_type": "stream",
756
- "text": [
757
- "Parse t1: Probability = 0.000680\n",
758
- "(NP astronomers (VP (V saw stars) (P with ears)))\n",
759
- "\n"
760
- ]
761
- }
762
- ],
763
- "source": [
764
- "from collections import defaultdict\n",
765
- "\n",
766
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
767
- "pcfg = {\n",
768
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
769
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
770
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
771
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
772
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
773
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
774
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
775
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
776
- " ('V', 'saw'): 1.0, # V -> saw\n",
777
- " ('P', 'with'): 1.0 # P -> with\n",
778
- "}\n",
779
- "\n",
780
- "# The sentence we want to parse\n",
781
- "sentence = \"astronomers saw stars with ears\".split()\n",
782
- "\n",
783
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
784
- "def cyk_algorithm(pcfg, sentence):\n",
785
- " n = len(sentence) # Length of the sentence (number of words)\n",
786
- " \n",
787
- " # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
788
- " table = defaultdict(list)\n",
789
- " \n",
790
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
791
- " for i, word in enumerate(sentence):\n",
792
- " for rule in pcfg:\n",
793
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
794
- " table[(i, i, rule[0])].append((pcfg[rule], rule[1]))\n",
795
- " \n",
796
- " # Step 2: Fill the table for larger spans (length > 1)\n",
797
- " for span in range(2, n + 1): # span length\n",
798
- " for i in range(n - span + 1): # start index of the span\n",
799
- " j = i + span - 1 # end index of the span\n",
800
- " for k in range(i, j): # split point\n",
801
- " for rule in pcfg:\n",
802
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
803
- " A, B, C = rule # A -> B C\n",
804
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
805
- " for prob1, derivation1 in table[(i, k, B)]:\n",
806
- " for prob2, derivation2 in table[(k + 1, j, C)]:\n",
807
- " prob = prob1 * prob2 * pcfg[rule]\n",
808
- " table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
809
- "\n",
810
- " # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
811
- " # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
812
- " return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
813
- "\n",
814
- "# Helper function to convert the parse tree into a string\n",
815
- "def build_parse_tree(derivation):\n",
816
- " if isinstance(derivation, tuple):\n",
817
- " A, derivation1, B, derivation2 = derivation\n",
818
- " return f\"({A} {build_parse_tree(derivation1)} {build_parse_tree(derivation2)})\"\n",
819
- " else:\n",
820
- " return derivation\n",
821
- "\n",
822
- "# Run the CYK algorithm and get all possible parses\n",
823
- "parses = cyk_algorithm(pcfg, sentence)\n",
824
- "\n",
825
- "# Function to sort parses by probability (optional, for better readability)\n",
826
- "def sort_parses(parses):\n",
827
- " return sorted(parses, key=lambda x: x[0], reverse=True)\n",
828
- "\n",
829
- "# Sort the parses (optional)\n",
830
- "sorted_parses = sort_parses(parses)\n",
831
- "\n",
832
- "# Print the final probabilities and derivations of the sentence\n",
833
- "if sorted_parses:\n",
834
- " for idx, (prob, derivation) in enumerate(sorted_parses, start=1):\n",
835
- " tree_str = build_parse_tree(derivation)\n",
836
- " print(f\"Parse t{idx}: Probability = {prob:.6f}\")\n",
837
- " print(tree_str)\n",
838
- " print() # Print a blank line between parses\n",
839
- "else:\n",
840
- " print(\"The sentence could not be parsed with the given grammar.\")\n"
841
- ]
842
- },
843
- {
844
- "cell_type": "code",
845
- "execution_count": 4,
846
- "id": "b8b18f54-377d-4288-8511-a283d619c590",
847
- "metadata": {},
848
- "outputs": [
849
- {
850
- "name": "stdout",
851
- "output_type": "stream",
852
- "text": [
853
- "Parse t1: Probability = 0.000680\n",
854
- "(NP\n",
855
- " astronomers\n",
856
- " (VP\n",
857
- " (V\n",
858
- " saw\n",
859
- " stars\n",
860
- " )\n",
861
- " (P\n",
862
- " with\n",
863
- " ears\n",
864
- " )\n",
865
- " )\n",
866
- ")\n",
867
- "\n"
868
- ]
869
- }
870
- ],
871
- "source": [
872
- "from collections import defaultdict\n",
873
- "\n",
874
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
875
- "pcfg = {\n",
876
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
877
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
878
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
879
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
880
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
881
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
882
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
883
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
884
- " ('V', 'saw'): 1.0, # V -> saw\n",
885
- " ('P', 'with'): 1.0 # P -> with\n",
886
- "}\n",
887
- "\n",
888
- "# The sentence we want to parse\n",
889
- "sentence = \"astronomers saw stars with ears\".split()\n",
890
- "\n",
891
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
892
- "def cyk_algorithm(pcfg, sentence):\n",
893
- " n = len(sentence) # Length of the sentence (number of words)\n",
894
- " \n",
895
- " # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
896
- " table = defaultdict(list)\n",
897
- " \n",
898
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
899
- " for i, word in enumerate(sentence):\n",
900
- " for rule in pcfg:\n",
901
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
902
- " table[(i, i, rule[0])].append((pcfg[rule], word))\n",
903
- " \n",
904
- " # Step 2: Fill the table for larger spans (length > 1)\n",
905
- " for span in range(2, n + 1): # span length\n",
906
- " for i in range(n - span + 1): # start index of the span\n",
907
- " j = i + span - 1 # end index of the span\n",
908
- " for k in range(i, j): # split point\n",
909
- " for rule in pcfg:\n",
910
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
911
- " A, B, C = rule # A -> B C\n",
912
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
913
- " for prob1, derivation1 in table[(i, k, B)]:\n",
914
- " for prob2, derivation2 in table[(k + 1, j, C)]:\n",
915
- " prob = prob1 * prob2 * pcfg[rule]\n",
916
- " table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
917
- "\n",
918
- " # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
919
- " # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
920
- " return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
921
- "\n",
922
- "# Helper function to print the parse tree in a readable format\n",
923
- "def print_parse_tree(derivation, indent=0):\n",
924
- " if isinstance(derivation, tuple):\n",
925
- " A, derivation1, B, derivation2 = derivation\n",
926
- " print(' ' * indent + f\"({A}\")\n",
927
- " print_parse_tree(derivation1, indent + 2)\n",
928
- " print_parse_tree(derivation2, indent + 2)\n",
929
- " print(' ' * indent + f\")\")\n",
930
- " else:\n",
931
- " print(' ' * indent + derivation)\n",
932
- "\n",
933
- "# Run the CYK algorithm and get all possible parses\n",
934
- "parses = cyk_algorithm(pcfg, sentence)\n",
935
- "\n",
936
- "# Sort parses by probability in descending order\n",
937
- "parses.sort(key=lambda x: x[0], reverse=True)\n",
938
- "\n",
939
- "# Print the final probabilities and derivations of the sentence\n",
940
- "if parses:\n",
941
- " for idx, (prob, derivation) in enumerate(parses, start=1):\n",
942
- " print(f\"Parse t{idx}: Probability = {prob:.6f}\")\n",
943
- " print_parse_tree(derivation)\n",
944
- " print() # Print a blank line between parses\n",
945
- "else:\n",
946
- " print(\"The sentence could not be parsed with the given grammar.\")"
947
- ]
948
- },
949
- {
950
- "cell_type": "code",
951
- "execution_count": 2,
952
- "id": "66fec238-991f-4ded-906e-0f558e8630ea",
953
- "metadata": {},
954
- "outputs": [
955
- {
956
- "name": "stdout",
957
- "output_type": "stream",
958
- "text": [
959
- "Final Probability of the sentence: 0.000680\n",
960
- "Parse Tree: (S NP (VP (VP V NP) (PP P NP)))\n"
961
- ]
962
- }
963
- ],
964
- "source": [
965
- "from collections import defaultdict\n",
966
- "\n",
967
- "# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
968
- "pcfg = {\n",
969
- " ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
970
- " ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
971
- " ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
972
- " ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
973
- " ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
974
- " ('NP', 'ears'): 0.18, # NP -> ears\n",
975
- " ('NP', 'stars'): 0.18, # NP -> stars\n",
976
- " ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
977
- " ('V', 'saw'): 1.0, # V -> saw\n",
978
- " ('P', 'with'): 1.0 # P -> with\n",
979
- "}\n",
980
- "\n",
981
- "# The sentence we want to parse\n",
982
- "sentence = \"astronomers saw stars with ears\".split()\n",
983
- "\n",
984
- "# Function to perform the CYK algorithm and calculate inside probabilities\n",
985
- "def cyk_algorithm(pcfg, sentence):\n",
986
- " n = len(sentence) # Length of the sentence (number of words)\n",
987
- " \n",
988
- " # Table to store probabilities\n",
989
- " table = defaultdict(float)\n",
990
- " backpointer = defaultdict(lambda: None)\n",
991
- "\n",
992
- " # Step 1: Initialize the table for single words (length 1 spans)\n",
993
- " for i, word in enumerate(sentence):\n",
994
- " for rule in pcfg:\n",
995
- " if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
996
- " table[(i, i, rule[0])] = pcfg[rule]\n",
997
- "\n",
998
- " # Step 2: Fill the table for larger spans (length > 1)\n",
999
- " for span in range(2, n + 1): # span length\n",
1000
- " for i in range(n - span + 1): # start index of the span\n",
1001
- " j = i + span - 1 # end index of the span\n",
1002
- " for k in range(i, j): # split point\n",
1003
- " for rule in pcfg:\n",
1004
- " if len(rule) == 3: # binary rule like S -> NP VP\n",
1005
- " A, B, C = rule # A -> B C\n",
1006
- " if (i, k, B) in table and (k + 1, j, C) in table:\n",
1007
- " prob = table[(i, k, B)] * table[(k + 1, j, C)] * pcfg[rule]\n",
1008
- " if prob > table[(i, j, A)]:\n",
1009
- " table[(i, j, A)] = prob\n",
1010
- " backpointer[(i, j, A)] = (B, C, i, k, j)\n",
1011
- "\n",
1012
- " # Step 3: Return the final result for the whole sentence as an 'S'\n",
1013
- " return table[(0, n-1, 'S')], backpointer\n",
1014
- "\n",
1015
- "# Function to build the parse tree from the backpointer\n",
1016
- "def build_parse_tree(backpointer, i, j, A):\n",
1017
- " if (i, j, A) not in backpointer or backpointer[(i, j, A)] is None:\n",
1018
- " return A # Base case: return the non-terminal if no children\n",
1019
- "\n",
1020
- " B, C, left_start, split, right_end = backpointer[(i, j, A)]\n",
1021
- " left_tree = build_parse_tree(backpointer, left_start, split, B)\n",
1022
- " right_tree = build_parse_tree(backpointer, split + 1, right_end, C)\n",
1023
- " return f'({A} {left_tree} {right_tree})'\n",
1024
- "\n",
1025
- "# Run the CYK algorithm and get the final probability and backpointer\n",
1026
- "final_prob, backpointer = cyk_algorithm(pcfg, sentence)\n",
1027
- "\n",
1028
- "# Print the final probability of the sentence\n",
1029
- "if final_prob > 0:\n",
1030
- " print(f\"Final Probability of the sentence: {final_prob:.6f}\")\n",
1031
- " parse_tree = build_parse_tree(backpointer, 0, len(sentence) - 1, 'S')\n",
1032
- " print(\"Parse Tree:\", parse_tree)\n",
1033
- "else:\n",
1034
- " print(\"The sentence could not be parsed with the given grammar.\")"
1035
- ]
1036
- },
1037
- {
1038
- "cell_type": "code",
1039
- "execution_count": null,
1040
- "id": "c4a04fbc-be02-4f88-9147-7b2b4497ace3",
1041
- "metadata": {},
1042
- "outputs": [],
1043
- "source": []
1044
- }
1045
- ],
1046
- "metadata": {
1047
- "kernelspec": {
1048
- "display_name": "Python 3 (ipykernel)",
1049
- "language": "python",
1050
- "name": "python3"
1051
- },
1052
- "language_info": {
1053
- "codemirror_mode": {
1054
- "name": "ipython",
1055
- "version": 3
1056
- },
1057
- "file_extension": ".py",
1058
- "mimetype": "text/x-python",
1059
- "name": "python",
1060
- "nbconvert_exporter": "python",
1061
- "pygments_lexer": "ipython3",
1062
- "version": "3.11.7"
1063
- }
1064
- },
1065
- "nbformat": 4,
1066
- "nbformat_minor": 5
1067
- }