noshot 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/__init__.py +1 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(A) Breadth First Search.ipynb +112 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(B) Depth First Search.ipynb +111 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(C) Uniform Cost Search.ipynb +134 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(D) Depth Limites Search.ipynb +115 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(E) Iterative Deepening DFS.ipynb +123 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/2_ANOVA.csv +769 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA (Repeated Measure).ipynb +126 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA.ipynb +134 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Sample 1 Way ANOVA Test.ipynb +119 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Two Way ANOVA.ipynb +138 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/reaction_time.csv +5 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sample_data.csv +16 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sleep_deprivation.csv +4 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/3_Linear.csv +4802 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression LAB.ipynb +113 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression New- sklearn.ipynb +118 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression.ipynb +148 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/house_rate.csv +22 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression New- sklearn.ipynb +128 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression.ipynb +145 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/default.csv +1001 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/hours_scores_records.csv +101 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(A) Astar.ipynb +256 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(B) IDAstar.ipynb +157 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(C) SMAstar.ipynb +178 -0
- noshot/data/AIDS CN NLP/AIDS/3. Genetic Algorithm/Genetic.ipynb +95 -0
- noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Simulated Annealing.ipynb +74 -0
- noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Sudoku Simulated Annealing.ipynb +103 -0
- noshot/data/AIDS CN NLP/AIDS/5. Alpha Beta Pruning/AlphaBetaPruning.ipynb +182 -0
- noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(A) CSP House Allocation.ipynb +120 -0
- noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(B) CSP Map Coloring.ipynb +125 -0
- noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/Random Sampling.ipynb +73 -0
- noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test Hash Function.ipynb +141 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test.ipynb +151 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/1_heart.csv +304 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/Independent T Test.ipynb +119 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/Paired T Test.ipynb +118 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test Hash Function.ipynb +142 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test.ipynb +158 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/iq_test.csv +0 -0
- noshot/data/AIDS CN NLP/AIDS/Others (AllinOne)/All In One.ipynb +4581 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/chat.java +81 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/LAN.tcl +65 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/analysis.awk +44 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/complexdcf.tcl +229 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/file_to_send.txt +2 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/filetransfer.java +119 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/rmi.java +56 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.awk +25 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.tcl +81 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.awk +27 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.tcl +153 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/analysis.awk +27 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/sack.tcl +86 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/vegas.tcl +86 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/analysis.awk +28 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/reno.tcl +78 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/tahoe.tcl +79 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/analysis.awk +27 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/flow.tcl +163 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/DV.tcl +111 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/LS.tcl +106 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/analysis.awk +36 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/analysis.awk +20 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/broadcast.tcl +76 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/multicast.tcl +103 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/DHCP.java +125 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/procedure.png +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/1-Prereqs.py +18 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/2-Chi2test.py +83 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/2-T-test.py +79 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/3-WSD-nb.py +53 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/4-Hindle-Rooth.py +53 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/5-HMM-Trellis.py +82 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/6-HMM-Viterbi.py +16 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/7-PCFG-parsetree.py +15 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Chi2test.ipynb +285 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Hindle-Rooth.ipynb +179 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 10 - Text generator using LSTM.ipynb +1461 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 11 NMT.ipynb +2307 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/PCFG.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Prereqs.ipynb +131 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/T test.ipynb +252 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/TFIDF BOW.ipynb +171 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Trellis.ipynb +244 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/WSD.ipynb +645 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Word2Vec.ipynb +93 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/tokenizer.ipynb +370 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/training_tokenizer.txt +6 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/exp0.ipynb +274 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/lab2.ipynb +905 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/test.txt +1 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/tokenizing.ipynb +272 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/collocation.ipynb +332 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/lab3.ipynb +549 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/nlp.txt +1 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/Lab4-NLP-Exp-2.ipynb +817 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/collocation.ipynb +332 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/NLP-Lab-5-Exp3.ipynb +231 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/word-sense-disambiguation.ipynb +507 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab06(additional-exercise)/lab6.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP Exp 4.ipynb +255 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP_Exp_5.ipynb +159 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab08(PCFG)/PCFG.ipynb +282 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/Lab 9 - MLP classifier.ipynb +670 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/MLP-alternative-code.ipynb +613 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/hindle-rooth-algorithm.ipynb +74 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab10(LSTM)/Lab_10_Text_generator_using_LSTM.ipynb +480 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Machine-translation.ipynb +445 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Viterbi-PCFG.ipynb +105 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/corpora_tools.py +87 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/data_utils.py +11 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/train_translator.py +83 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab12(Information-Extraction)/Information_Extraction.ipynb +201 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Backtrack-without-Verbitri.ipynb +185 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Backward-Procedure.ipynb +597 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Bag_of.ipynb +1422 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/CYK-algorithm.ipynb +1067 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Forward-Procedure.ipynb +477 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/LSTM.ipynb +1290 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 10 - Text generator using LSTM.ipynb +1461 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 11 NMT.ipynb +2307 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-4.ipynb +216 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-5.ipynb +216 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/abc.txt +6 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/ex-1-nltk.ipynb +711 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/ex-2-nlp.ipynb +267 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/exp8&9.ipynb +305 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/hind.ipynb +287 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/lab66.ipynb +752 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/leb_3.ipynb +612 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/naive_bayes_classifier.pkl +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_1.ipynb +3008 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_2.ipynb +3095 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlplab-9.ipynb +295 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nltk-ex-4.ipynb +506 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text1.txt +48 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text2.txt +8 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text3.txt +48 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/translation-rnn.ipynb +812 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/word2vector.ipynb +173 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Backward Procedure Algorithm.ipynb +179 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Chi Square Collocation.ipynb +208 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Collocation (T test).ipynb +188 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Experiment 1.ipynb +437 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Forward Procedure Algorithm.ipynb +132 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Hindle Rooth.ipynb +414 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/MachineTranslation.ipynb +368 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using MLPClassifier.ipynb +86 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using Tensorflow.ipynb +112 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/PCFG Inside Probability.ipynb +451 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Text Generation using LSTM.ipynb +297 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Viterbi.ipynb +310 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Word Sense Disambiguation.ipynb +335 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/10.Text Generation using LSTM.ipynb +316 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/11.Machine Translation.ipynb +868 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/2.T and Chi2 Test.ipynb +204 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/3.Word Sense Diambiguation.ipynb +234 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/4.Hinddle and Rooth.ipynb +128 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/5.Forward and Backward.ipynb +149 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/6.Viterbi.ipynb +111 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG Parse Tree.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG using cyk.ipynb +101 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/8.Bag of words and TF-IDF.ipynb +310 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/9.Word2Vector.ipynb +78 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/NLP ALL In One.ipynb +2619 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/sample1.txt +15 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/sample2.txt +4 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/word2vec_model.bin +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/1. Tokenize, Tagging, NER, Parse Tree.ipynb +312 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/2. T Test and Chi2 Test.ipynb +185 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/3. Naive Bayes WSD.ipynb +199 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/4. Hinddle and Rooth.ipynb +151 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/5 and 6 FWD, BWD, Viterbi.ipynb +164 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/7. PCFG using CYK.ipynb +383 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/8. BOW and TF-IDF.ipynb +252 -0
- noshot/data/AIDS CN NLP/Ubuntu CN Lab.iso +0 -0
- noshot/main.py +47 -0
- noshot-0.1.0.dist-info/LICENSE.txt +21 -0
- noshot-0.1.0.dist-info/METADATA +65 -0
- noshot-0.1.0.dist-info/RECORD +210 -0
- noshot-0.1.0.dist-info/WHEEL +5 -0
- noshot-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1067 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": 1,
|
6
|
+
"id": "465112ff-5cd0-4b7b-9722-da197d0593d7",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [
|
9
|
+
{
|
10
|
+
"name": "stdout",
|
11
|
+
"output_type": "stream",
|
12
|
+
"text": [
|
13
|
+
"Span 0-0 -> NP: Probability = 0.100000\n",
|
14
|
+
"Span 1-1 -> V: Probability = 1.000000\n",
|
15
|
+
"Span 2-2 -> NP: Probability = 0.180000\n",
|
16
|
+
"Span 3-3 -> P: Probability = 1.000000\n",
|
17
|
+
"Span 4-4 -> NP: Probability = 0.180000\n",
|
18
|
+
"Span 1-2 -> VP: Probability = 0.126000\n",
|
19
|
+
"Span 3-4 -> PP: Probability = 0.180000\n",
|
20
|
+
"Span 0-2 -> S: Probability = 0.012600\n",
|
21
|
+
"Span 1-4 -> VP: Probability = 0.006804\n",
|
22
|
+
"Span 0-4 -> S: Probability = 0.000680\n"
|
23
|
+
]
|
24
|
+
}
|
25
|
+
],
|
26
|
+
"source": [
|
27
|
+
"from collections import defaultdict\n",
|
28
|
+
"\n",
|
29
|
+
"# Grammar rules with probabilities\n",
|
30
|
+
"pcfg = {\n",
|
31
|
+
" ('S', 'NP', 'VP'): 1.0,\n",
|
32
|
+
" ('VP', 'V', 'NP'): 0.7,\n",
|
33
|
+
" ('VP', 'VP', 'PP'): 0.3,\n",
|
34
|
+
" ('PP', 'P', 'NP'): 1.0,\n",
|
35
|
+
" ('NP', 'astronomers'): 0.1,\n",
|
36
|
+
" ('NP', 'ears'): 0.18,\n",
|
37
|
+
" ('NP', 'stars'): 0.18,\n",
|
38
|
+
" ('NP', 'telescopes'): 0.18,\n",
|
39
|
+
" ('V', 'saw'): 1.0,\n",
|
40
|
+
" ('P', 'with'): 1.0\n",
|
41
|
+
"}\n",
|
42
|
+
"\n",
|
43
|
+
"# The sentence we want to parse\n",
|
44
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
45
|
+
"\n",
|
46
|
+
"# Function to perform CYK algorithm\n",
|
47
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
48
|
+
" n = len(sentence)\n",
|
49
|
+
" # Create a table to store probabilities\n",
|
50
|
+
" table = defaultdict(float)\n",
|
51
|
+
" \n",
|
52
|
+
" # Initialize for the single words (length 1 spans)\n",
|
53
|
+
" for i, word in enumerate(sentence):\n",
|
54
|
+
" for rule in pcfg:\n",
|
55
|
+
" if len(rule) == 2 and rule[1] == word:\n",
|
56
|
+
" table[(i, i, rule[0])] = pcfg[rule]\n",
|
57
|
+
" \n",
|
58
|
+
" # Filling the table for larger spans (length > 1)\n",
|
59
|
+
" for span in range(2, n+1): # span length\n",
|
60
|
+
" for i in range(n - span + 1): # starting point of the span\n",
|
61
|
+
" j = i + span - 1 # ending point of the span\n",
|
62
|
+
" for k in range(i, j): # split point\n",
|
63
|
+
" for rule in pcfg:\n",
|
64
|
+
" if len(rule) == 3: # binary rules\n",
|
65
|
+
" A, B, C = rule\n",
|
66
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
67
|
+
" prob = table[(i, k, B)] * table[(k + 1, j, C)] * pcfg[rule]\n",
|
68
|
+
" if prob > table[(i, j, A)]:\n",
|
69
|
+
" table[(i, j, A)] = prob\n",
|
70
|
+
"\n",
|
71
|
+
" # Print the table with inside probabilities\n",
|
72
|
+
" return table\n",
|
73
|
+
"\n",
|
74
|
+
"# Run the CYK algorithm\n",
|
75
|
+
"table = cyk_algorithm(pcfg, sentence)\n",
|
76
|
+
"\n",
|
77
|
+
"# Print the resulting probabilities\n",
|
78
|
+
"for key, prob in table.items():\n",
|
79
|
+
" print(f\"Span {key[0]}-{key[1]} -> {key[2]}: Probability = {prob:.6f}\")\n",
|
80
|
+
"\n"
|
81
|
+
]
|
82
|
+
},
|
83
|
+
{
|
84
|
+
"cell_type": "code",
|
85
|
+
"execution_count": 2,
|
86
|
+
"id": "693d06f1-9161-44e5-9c9d-98c9da850b67",
|
87
|
+
"metadata": {},
|
88
|
+
"outputs": [
|
89
|
+
{
|
90
|
+
"name": "stdout",
|
91
|
+
"output_type": "stream",
|
92
|
+
"text": [
|
93
|
+
"Inside probability of the sequence the cat eats: 0.018000000000000002\n"
|
94
|
+
]
|
95
|
+
}
|
96
|
+
],
|
97
|
+
"source": [
|
98
|
+
"import numpy as np\n",
|
99
|
+
"from collections import defaultdict\n",
|
100
|
+
"\n",
|
101
|
+
"class PCFG:\n",
|
102
|
+
" def __init__(self):\n",
|
103
|
+
" # Non-terminal production rules and their probabilities\n",
|
104
|
+
" self.productions = defaultdict(list)\n",
|
105
|
+
" self.terminals = defaultdict(list)\n",
|
106
|
+
"\n",
|
107
|
+
" def add_production(self, lhs, rhs, prob):\n",
|
108
|
+
" \"\"\" Adds a production rule with its probability \"\"\"\n",
|
109
|
+
" if len(rhs) == 1 and rhs[0].islower(): # Terminal rule\n",
|
110
|
+
" self.terminals[rhs[0]].append((lhs, prob))\n",
|
111
|
+
" else: # Non-terminal rule\n",
|
112
|
+
" self.productions[lhs].append((rhs, prob))\n",
|
113
|
+
"\n",
|
114
|
+
"def cyk_pcfg(pcfg, words):\n",
|
115
|
+
" \"\"\" Applies the CYK algorithm to find the inside probability of a word sequence \"\"\"\n",
|
116
|
+
" n = len(words)\n",
|
117
|
+
" non_terminals = list(pcfg.productions.keys())\n",
|
118
|
+
" \n",
|
119
|
+
" # Initialize a 3D table for inside probabilities\n",
|
120
|
+
" P = defaultdict(lambda: np.zeros((n, n)))\n",
|
121
|
+
" \n",
|
122
|
+
" # Fill the diagonal with terminal production probabilities\n",
|
123
|
+
" for i, word in enumerate(words):\n",
|
124
|
+
" if word in pcfg.terminals:\n",
|
125
|
+
" for lhs, prob in pcfg.terminals[word]:\n",
|
126
|
+
" P[lhs][i, i] = prob\n",
|
127
|
+
"\n",
|
128
|
+
" # Fill the table for subsequences\n",
|
129
|
+
" for span in range(2, n + 1): # span length from 2 to n\n",
|
130
|
+
" for i in range(n - span + 1):\n",
|
131
|
+
" j = i + span - 1\n",
|
132
|
+
" for k in range(i, j): # midpoint\n",
|
133
|
+
" for lhs in non_terminals:\n",
|
134
|
+
" for rhs, prob in pcfg.productions[lhs]:\n",
|
135
|
+
" if len(rhs) == 2:\n",
|
136
|
+
" left, right = rhs\n",
|
137
|
+
" P[lhs][i, j] += prob * P[left][i, k] * P[right][k + 1, j]\n",
|
138
|
+
"\n",
|
139
|
+
" # The inside probability for the start symbol S to derive the entire sequence\n",
|
140
|
+
" return P['S'][0, n - 1]\n",
|
141
|
+
"\n",
|
142
|
+
"# Example Usage:\n",
|
143
|
+
"\n",
|
144
|
+
"# Define a PCFG\n",
|
145
|
+
"pcfg = PCFG()\n",
|
146
|
+
"pcfg.add_production('S', ['NP', 'VP'], 0.9)\n",
|
147
|
+
"pcfg.add_production('S', ['VP'], 0.1)\n",
|
148
|
+
"pcfg.add_production('NP', ['Det', 'N'], 0.5)\n",
|
149
|
+
"pcfg.add_production('VP', ['V', 'NP'], 0.5)\n",
|
150
|
+
"pcfg.add_production('VP', ['eats'], 0.1)\n",
|
151
|
+
"pcfg.add_production('Det', ['the'], 0.8)\n",
|
152
|
+
"pcfg.add_production('N', ['cat'], 0.5)\n",
|
153
|
+
"pcfg.add_production('N', ['food'], 0.5)\n",
|
154
|
+
"pcfg.add_production('V', ['eats'], 1.0)\n",
|
155
|
+
"\n",
|
156
|
+
"# Example word sequence\n",
|
157
|
+
"words = ['the', 'cat', 'eats']\n",
|
158
|
+
"\n",
|
159
|
+
"# Calculate inside probability using CYK algorithm\n",
|
160
|
+
"inside_prob = cyk_pcfg(pcfg, words)\n",
|
161
|
+
"\n",
|
162
|
+
"print(f\"Inside probability of the sequence {' '.join(words)}: {inside_prob}\")\n",
|
163
|
+
"\n"
|
164
|
+
]
|
165
|
+
},
|
166
|
+
{
|
167
|
+
"cell_type": "code",
|
168
|
+
"execution_count": 3,
|
169
|
+
"id": "92685c0a-36e5-4092-b415-17a39190bf31",
|
170
|
+
"metadata": {},
|
171
|
+
"outputs": [
|
172
|
+
{
|
173
|
+
"name": "stdout",
|
174
|
+
"output_type": "stream",
|
175
|
+
"text": [
|
176
|
+
"Final Probability of the sentence: 0.000680\n"
|
177
|
+
]
|
178
|
+
}
|
179
|
+
],
|
180
|
+
"source": [
|
181
|
+
"from collections import defaultdict\n",
|
182
|
+
"\n",
|
183
|
+
"# Grammar rules with probabilities\n",
|
184
|
+
"pcfg = {\n",
|
185
|
+
" ('S', 'NP', 'VP'): 1.0,\n",
|
186
|
+
" ('VP', 'V', 'NP'): 0.7,\n",
|
187
|
+
" ('VP', 'VP', 'PP'): 0.3,\n",
|
188
|
+
" ('PP', 'P', 'NP'): 1.0,\n",
|
189
|
+
" ('NP', 'astronomers'): 0.1,\n",
|
190
|
+
" ('NP', 'ears'): 0.18,\n",
|
191
|
+
" ('NP', 'stars'): 0.18,\n",
|
192
|
+
" ('NP', 'telescopes'): 0.18,\n",
|
193
|
+
" ('V', 'saw'): 1.0,\n",
|
194
|
+
" ('P', 'with'): 1.0\n",
|
195
|
+
"}\n",
|
196
|
+
"\n",
|
197
|
+
"# The sentence we want to parse\n",
|
198
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
199
|
+
"\n",
|
200
|
+
"# Function to perform CYK algorithm\n",
|
201
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
202
|
+
" n = len(sentence)\n",
|
203
|
+
" # Create a table to store probabilities\n",
|
204
|
+
" table = defaultdict(float)\n",
|
205
|
+
" \n",
|
206
|
+
" # Initialize for the single words (length 1 spans)\n",
|
207
|
+
" for i, word in enumerate(sentence):\n",
|
208
|
+
" for rule in pcfg:\n",
|
209
|
+
" if len(rule) == 2 and rule[1] == word:\n",
|
210
|
+
" table[(i, i, rule[0])] = pcfg[rule]\n",
|
211
|
+
" \n",
|
212
|
+
" # Filling the table for larger spans (length > 1)\n",
|
213
|
+
" for span in range(2, n+1): # span length\n",
|
214
|
+
" for i in range(n - span + 1): # starting point of the span\n",
|
215
|
+
" j = i + span - 1 # ending point of the span\n",
|
216
|
+
" for k in range(i, j): # split point\n",
|
217
|
+
" for rule in pcfg:\n",
|
218
|
+
" if len(rule) == 3: # binary rules\n",
|
219
|
+
" A, B, C = rule\n",
|
220
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
221
|
+
" prob = table[(i, k, B)] * table[(k + 1, j, C)] * pcfg[rule]\n",
|
222
|
+
" if prob > table[(i, j, A)]:\n",
|
223
|
+
" table[(i, j, A)] = prob\n",
|
224
|
+
"\n",
|
225
|
+
" # Return the final result for the whole sentence\n",
|
226
|
+
" return table[(0, n-1, 'S')] # The probability of the sentence being an S (sentence)\n",
|
227
|
+
"\n",
|
228
|
+
"# Run the CYK algorithm\n",
|
229
|
+
"final_prob = cyk_algorithm(pcfg, sentence)\n",
|
230
|
+
"\n",
|
231
|
+
"# Print the final probability of the sentence\n",
|
232
|
+
"print(f\"Final Probability of the sentence: {final_prob:.6f}\")\n"
|
233
|
+
]
|
234
|
+
},
|
235
|
+
{
|
236
|
+
"cell_type": "code",
|
237
|
+
"execution_count": 9,
|
238
|
+
"id": "12d3ba54-fec7-492b-b967-371f607b5f1d",
|
239
|
+
"metadata": {},
|
240
|
+
"outputs": [
|
241
|
+
{
|
242
|
+
"name": "stdout",
|
243
|
+
"output_type": "stream",
|
244
|
+
"text": [
|
245
|
+
"Final Probability of the sentence: 0.000680\n"
|
246
|
+
]
|
247
|
+
}
|
248
|
+
],
|
249
|
+
"source": [
|
250
|
+
"from collections import defaultdict\n",
|
251
|
+
"\n",
|
252
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
253
|
+
"pcfg = {\n",
|
254
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
255
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
256
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
257
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
258
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
259
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
260
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
261
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
262
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
263
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
264
|
+
"}\n",
|
265
|
+
"\n",
|
266
|
+
"# The sentence we want to parse\n",
|
267
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
268
|
+
"\n",
|
269
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
270
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
271
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
272
|
+
" \n",
|
273
|
+
" # Table to store probabilities: (start_index, end_index, non-terminal) -> probability\n",
|
274
|
+
" table = defaultdict(float)\n",
|
275
|
+
" \n",
|
276
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
277
|
+
" for i, word in enumerate(sentence):\n",
|
278
|
+
" for rule in pcfg:\n",
|
279
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
280
|
+
" table[(i, i, rule[0])] = pcfg[rule]\n",
|
281
|
+
" \n",
|
282
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
283
|
+
" for span in range(2, n + 1): # span length\n",
|
284
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
285
|
+
" j = i + span - 1 # end index of the span\n",
|
286
|
+
" for k in range(i, j): # split point\n",
|
287
|
+
" for rule in pcfg:\n",
|
288
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
289
|
+
" A, B, C = rule # A -> B C\n",
|
290
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
291
|
+
" prob = table[(i, k, B)] * table[(k + 1, j, C)] * pcfg[rule]\n",
|
292
|
+
" if prob > table[(i, j, A)]:\n",
|
293
|
+
" table[(i, j, A)] = prob\n",
|
294
|
+
"\n",
|
295
|
+
" # Step 3: Return the final result for the whole sentence as an 'S' (complete sentence)\n",
|
296
|
+
" # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
|
297
|
+
" return table[(0, n-1, 'S')] # Probability of the whole sentence being an S (sentence)\n",
|
298
|
+
"\n",
|
299
|
+
"# Run the CYK algorithm and get the final probability\n",
|
300
|
+
"final_prob = cyk_algorithm(pcfg, sentence)\n",
|
301
|
+
"\n",
|
302
|
+
"# Print the final probability of the sentence\n",
|
303
|
+
"if final_prob > 0:\n",
|
304
|
+
" print(f\"Final Probability of the sentence: {final_prob:.6f}\")\n",
|
305
|
+
"else:\n",
|
306
|
+
" print(\"The sentence could not be parsed with the given grammar.\")"
|
307
|
+
]
|
308
|
+
},
|
309
|
+
{
|
310
|
+
"cell_type": "code",
|
311
|
+
"execution_count": 11,
|
312
|
+
"id": "acadf0b1-acd3-420e-9ea8-04ec2046b694",
|
313
|
+
"metadata": {},
|
314
|
+
"outputs": [
|
315
|
+
{
|
316
|
+
"name": "stdout",
|
317
|
+
"output_type": "stream",
|
318
|
+
"text": [
|
319
|
+
"Parse t1: Probability = 0.000680, Derivation = ('NP', 'astronomers', 'VP', ('VP', ('V', 'saw', 'NP', 'stars'), 'PP', ('P', 'with', 'NP', 'ears')))\n"
|
320
|
+
]
|
321
|
+
}
|
322
|
+
],
|
323
|
+
"source": [
|
324
|
+
"from collections import defaultdict\n",
|
325
|
+
"\n",
|
326
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
327
|
+
"pcfg = {\n",
|
328
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
329
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
330
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
331
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
332
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
333
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
334
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
335
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
336
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
337
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
338
|
+
"}\n",
|
339
|
+
"\n",
|
340
|
+
"# The sentence we want to parse\n",
|
341
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
342
|
+
"\n",
|
343
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
344
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
345
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
346
|
+
" \n",
|
347
|
+
" # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
|
348
|
+
" table = defaultdict(list)\n",
|
349
|
+
" \n",
|
350
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
351
|
+
" for i, word in enumerate(sentence):\n",
|
352
|
+
" for rule in pcfg:\n",
|
353
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
354
|
+
" table[(i, i, rule[0])].append((pcfg[rule], word))\n",
|
355
|
+
" \n",
|
356
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
357
|
+
" for span in range(2, n + 1): # span length\n",
|
358
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
359
|
+
" j = i + span - 1 # end index of the span\n",
|
360
|
+
" for k in range(i, j): # split point\n",
|
361
|
+
" for rule in pcfg:\n",
|
362
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
363
|
+
" A, B, C = rule # A -> B C\n",
|
364
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
365
|
+
" for prob1, derivation1 in table[(i, k, B)]:\n",
|
366
|
+
" for prob2, derivation2 in table[(k + 1, j, C)]:\n",
|
367
|
+
" prob = prob1 * prob2 * pcfg[rule]\n",
|
368
|
+
" table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
|
369
|
+
"\n",
|
370
|
+
" # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
|
371
|
+
" # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
|
372
|
+
" return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
|
373
|
+
"\n",
|
374
|
+
"# Run the CYK algorithm and get all possible parses\n",
|
375
|
+
"parses = cyk_algorithm(pcfg, sentence)\n",
|
376
|
+
"\n",
|
377
|
+
"# Print the final probabilities and derivations of the sentence\n",
|
378
|
+
"if parses:\n",
|
379
|
+
" for idx, (prob, derivation) in enumerate(parses, start=1):\n",
|
380
|
+
" print(f\"Parse t{idx}: Probability = {prob:.6f}, Derivation = {derivation}\")\n",
|
381
|
+
"else:\n",
|
382
|
+
" print(\"The sentence could not be parsed with the given grammar.\")\n"
|
383
|
+
]
|
384
|
+
},
|
385
|
+
{
|
386
|
+
"cell_type": "code",
|
387
|
+
"execution_count": 14,
|
388
|
+
"id": "990ba565-bf59-49b3-aef9-355a6785e6dc",
|
389
|
+
"metadata": {},
|
390
|
+
"outputs": [
|
391
|
+
{
|
392
|
+
"name": "stdout",
|
393
|
+
"output_type": "stream",
|
394
|
+
"text": [
|
395
|
+
"Parse t1: Probability = 0.000680, Derivation = ('NP', 'astronomers', 'VP', ('VP', ('V', 'saw', 'NP', 'stars'), 'PP', ('P', 'with', 'NP', 'ears')))\n"
|
396
|
+
]
|
397
|
+
}
|
398
|
+
],
|
399
|
+
"source": [
|
400
|
+
"from collections import defaultdict\n",
|
401
|
+
"\n",
|
402
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
403
|
+
"pcfg = {\n",
|
404
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
405
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
406
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
407
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
408
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
409
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
410
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
411
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
412
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
413
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
414
|
+
"}\n",
|
415
|
+
"\n",
|
416
|
+
"# The sentence we want to parse\n",
|
417
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
418
|
+
"\n",
|
419
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
420
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
421
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
422
|
+
" \n",
|
423
|
+
" # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
|
424
|
+
" table = defaultdict(list)\n",
|
425
|
+
" \n",
|
426
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
427
|
+
" for i, word in enumerate(sentence):\n",
|
428
|
+
" for rule in pcfg:\n",
|
429
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
430
|
+
" table[(i, i, rule[0])].append((pcfg[rule], word))\n",
|
431
|
+
" \n",
|
432
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
433
|
+
" for span in range(2, n + 1): # span length\n",
|
434
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
435
|
+
" j = i + span - 1 # end index of the span\n",
|
436
|
+
" for k in range(i, j): # split point\n",
|
437
|
+
" for rule in pcfg:\n",
|
438
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
439
|
+
" A, B, C = rule # A -> B C\n",
|
440
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
441
|
+
" for prob1, derivation1 in table[(i, k, B)]:\n",
|
442
|
+
" for prob2, derivation2 in table[(k + 1, j, C)]:\n",
|
443
|
+
" prob = prob1 * prob2 * pcfg[rule]\n",
|
444
|
+
" table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
|
445
|
+
"\n",
|
446
|
+
" # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
|
447
|
+
" # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
|
448
|
+
" return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
|
449
|
+
"\n",
|
450
|
+
"# Run the CYK algorithm and get all possible parses\n",
|
451
|
+
"parses = cyk_algorithm(pcfg, sentence)\n",
|
452
|
+
"\n",
|
453
|
+
"# Print the final probabilities and derivations of the sentence\n",
|
454
|
+
"if parses:\n",
|
455
|
+
" for idx, (prob, derivation) in enumerate(parses, start=1):\n",
|
456
|
+
" print(f\"Parse t{idx}: Probability = {prob:.6f}, Derivation = {derivation}\")\n",
|
457
|
+
"else:\n",
|
458
|
+
" print(\"The sentence could not be parsed with the given grammar.\")\n"
|
459
|
+
]
|
460
|
+
},
|
461
|
+
{
|
462
|
+
"cell_type": "code",
|
463
|
+
"execution_count": 19,
|
464
|
+
"id": "03b70885-e274-4f15-b617-9ce8cbea6ff9",
|
465
|
+
"metadata": {},
|
466
|
+
"outputs": [
|
467
|
+
{
|
468
|
+
"name": "stdout",
|
469
|
+
"output_type": "stream",
|
470
|
+
"text": [
|
471
|
+
"Parse t1: Probability = 0.000680, Derivation = ('NP', 'astronomers', 'VP', ('VP', ('V', 'saw', 'NP', 'stars'), 'PP', ('P', 'with', 'NP', 'ears')))\n"
|
472
|
+
]
|
473
|
+
}
|
474
|
+
],
|
475
|
+
"source": [
|
476
|
+
"from collections import defaultdict\n",
|
477
|
+
"\n",
|
478
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
479
|
+
"pcfg = {\n",
|
480
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
481
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
482
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
483
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
484
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
485
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
486
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
487
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
488
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
489
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
490
|
+
"}\n",
|
491
|
+
"\n",
|
492
|
+
"# The sentence we want to parse\n",
|
493
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
494
|
+
"\n",
|
495
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
496
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
497
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
498
|
+
" \n",
|
499
|
+
" # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
|
500
|
+
" table = defaultdict(list)\n",
|
501
|
+
" \n",
|
502
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
503
|
+
" for i, word in enumerate(sentence):\n",
|
504
|
+
" for rule in pcfg:\n",
|
505
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
506
|
+
" table[(i, i, rule[0])].append((pcfg[rule], word))\n",
|
507
|
+
" \n",
|
508
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
509
|
+
" for span in range(2, n + 1): # span length\n",
|
510
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
511
|
+
" j = i + span - 1 # end index of the span\n",
|
512
|
+
" for k in range(i, j): # split point\n",
|
513
|
+
" for rule in pcfg:\n",
|
514
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
515
|
+
" A, B, C = rule # A -> B C\n",
|
516
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
517
|
+
" for prob1, derivation1 in table[(i, k, B)]:\n",
|
518
|
+
" for prob2, derivation2 in table[(k + 1, j, C)]:\n",
|
519
|
+
" prob = prob1 * prob2 * pcfg[rule]\n",
|
520
|
+
" table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
|
521
|
+
"\n",
|
522
|
+
" # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
|
523
|
+
" # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
|
524
|
+
" return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
|
525
|
+
"\n",
|
526
|
+
"# Run the CYK algorithm and get all possible parses\n",
|
527
|
+
"parses = cyk_algorithm(pcfg, sentence)\n",
|
528
|
+
"\n",
|
529
|
+
"# Print the final probabilities and derivations of the sentence\n",
|
530
|
+
"if parses:\n",
|
531
|
+
" for idx, (prob, derivation) in enumerate(parses, start=1):\n",
|
532
|
+
" print(f\"Parse t{idx}: Probability = {prob:.6f}, Derivation = {derivation}\")\n",
|
533
|
+
" #print(parses)\n",
|
534
|
+
"else:\n",
|
535
|
+
" print(\"The sentence could not be parsed with the given grammar.\")\n"
|
536
|
+
]
|
537
|
+
},
|
538
|
+
{
|
539
|
+
"cell_type": "code",
|
540
|
+
"execution_count": 1,
|
541
|
+
"id": "a36738d7-23a5-4a27-a58e-b0df0ee7132b",
|
542
|
+
"metadata": {},
|
543
|
+
"outputs": [
|
544
|
+
{
|
545
|
+
"name": "stdout",
|
546
|
+
"output_type": "stream",
|
547
|
+
"text": [
|
548
|
+
"Parse t1: Probability = 0.000680\n",
|
549
|
+
"(NP\n",
|
550
|
+
" astronomers\n",
|
551
|
+
" (VP\n",
|
552
|
+
" (V\n",
|
553
|
+
" saw\n",
|
554
|
+
" stars\n",
|
555
|
+
" )\n",
|
556
|
+
" (P\n",
|
557
|
+
" with\n",
|
558
|
+
" ears\n",
|
559
|
+
" )\n",
|
560
|
+
" )\n",
|
561
|
+
")\n",
|
562
|
+
"\n"
|
563
|
+
]
|
564
|
+
}
|
565
|
+
],
|
566
|
+
"source": [
|
567
|
+
"from collections import defaultdict\n",
|
568
|
+
"\n",
|
569
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
570
|
+
"pcfg = {\n",
|
571
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
572
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
573
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
574
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
575
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
576
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
577
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
578
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
579
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
580
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
581
|
+
"}\n",
|
582
|
+
"\n",
|
583
|
+
"# The sentence we want to parse\n",
|
584
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
585
|
+
"\n",
|
586
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
587
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
588
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
589
|
+
" \n",
|
590
|
+
" # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
|
591
|
+
" table = defaultdict(list)\n",
|
592
|
+
" \n",
|
593
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
594
|
+
" for i, word in enumerate(sentence):\n",
|
595
|
+
" for rule in pcfg:\n",
|
596
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
597
|
+
" table[(i, i, rule[0])].append((pcfg[rule], word))\n",
|
598
|
+
" \n",
|
599
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
600
|
+
" for span in range(2, n + 1): # span length\n",
|
601
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
602
|
+
" j = i + span - 1 # end index of the span\n",
|
603
|
+
" for k in range(i, j): # split point\n",
|
604
|
+
" for rule in pcfg:\n",
|
605
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
606
|
+
" A, B, C = rule # A -> B C\n",
|
607
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
608
|
+
" for prob1, derivation1 in table[(i, k, B)]:\n",
|
609
|
+
" for prob2, derivation2 in table[(k + 1, j, C)]:\n",
|
610
|
+
" prob = prob1 * prob2 * pcfg[rule]\n",
|
611
|
+
" table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
|
612
|
+
"\n",
|
613
|
+
" # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
|
614
|
+
" # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
|
615
|
+
" return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
|
616
|
+
"\n",
|
617
|
+
"# Helper function to print the parse tree in a readable format\n",
|
618
|
+
"def print_parse_tree(derivation, indent=0):\n",
|
619
|
+
" if isinstance(derivation, tuple):\n",
|
620
|
+
" A, derivation1, B, derivation2 = derivation\n",
|
621
|
+
" print(' ' * indent + f\"({A}\")\n",
|
622
|
+
" print_parse_tree(derivation1, indent + 2)\n",
|
623
|
+
" print_parse_tree(derivation2, indent + 2)\n",
|
624
|
+
" print(' ' * indent + f\")\")\n",
|
625
|
+
" else:\n",
|
626
|
+
" print(' ' * indent + derivation)\n",
|
627
|
+
"\n",
|
628
|
+
"# Run the CYK algorithm and get all possible parses\n",
|
629
|
+
"parses = cyk_algorithm(pcfg, sentence)\n",
|
630
|
+
"\n",
|
631
|
+
"# Print the final probabilities and derivations of the sentence\n",
|
632
|
+
"if parses:\n",
|
633
|
+
" for idx, (prob, derivation) in enumerate(parses, start=1):\n",
|
634
|
+
" print(f\"Parse t{idx}: Probability = {prob:.6f}\")\n",
|
635
|
+
" print_parse_tree(derivation)\n",
|
636
|
+
" print() # Print a blank line between parses\n",
|
637
|
+
"else:\n",
|
638
|
+
" print(\"The sentence could not be parsed with the given grammar.\")\n"
|
639
|
+
]
|
640
|
+
},
|
641
|
+
{
|
642
|
+
"cell_type": "code",
|
643
|
+
"execution_count": 2,
|
644
|
+
"id": "ab75fa9e-9ab1-43f5-9045-e1d7354b4aaa",
|
645
|
+
"metadata": {},
|
646
|
+
"outputs": [
|
647
|
+
{
|
648
|
+
"name": "stdout",
|
649
|
+
"output_type": "stream",
|
650
|
+
"text": [
|
651
|
+
"Parse t1: Probability = 0.000680\n",
|
652
|
+
"(NP\n",
|
653
|
+
" astronomers\n",
|
654
|
+
" (VP\n",
|
655
|
+
" (V\n",
|
656
|
+
" saw\n",
|
657
|
+
" stars\n",
|
658
|
+
" )\n",
|
659
|
+
" (P\n",
|
660
|
+
" with\n",
|
661
|
+
" ears\n",
|
662
|
+
" )\n",
|
663
|
+
" )\n",
|
664
|
+
")\n",
|
665
|
+
"\n"
|
666
|
+
]
|
667
|
+
}
|
668
|
+
],
|
669
|
+
"source": [
|
670
|
+
"from collections import defaultdict\n",
|
671
|
+
"\n",
|
672
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
673
|
+
"pcfg = {\n",
|
674
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
675
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
676
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
677
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
678
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
679
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
680
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
681
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
682
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
683
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
684
|
+
"}\n",
|
685
|
+
"\n",
|
686
|
+
"# The sentence we want to parse\n",
|
687
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
688
|
+
"\n",
|
689
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
690
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
691
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
692
|
+
" \n",
|
693
|
+
" # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
|
694
|
+
" table = defaultdict(list)\n",
|
695
|
+
" \n",
|
696
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
697
|
+
" for i, word in enumerate(sentence):\n",
|
698
|
+
" for rule in pcfg:\n",
|
699
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
700
|
+
" table[(i, i, rule[0])].append((pcfg[rule], word))\n",
|
701
|
+
" \n",
|
702
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
703
|
+
" for span in range(2, n + 1): # span length\n",
|
704
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
705
|
+
" j = i + span - 1 # end index of the span\n",
|
706
|
+
" for k in range(i, j): # split point\n",
|
707
|
+
" for rule in pcfg:\n",
|
708
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
709
|
+
" A, B, C = rule # A -> B C\n",
|
710
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
711
|
+
" for prob1, derivation1 in table[(i, k, B)]:\n",
|
712
|
+
" for prob2, derivation2 in table[(k + 1, j, C)]:\n",
|
713
|
+
" prob = prob1 * prob2 * pcfg[rule]\n",
|
714
|
+
" table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
|
715
|
+
"\n",
|
716
|
+
" # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
|
717
|
+
" # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
|
718
|
+
" return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
|
719
|
+
"\n",
|
720
|
+
"# Helper function to print the parse tree in a readable format\n",
|
721
|
+
"def print_parse_tree(derivation, indent=0):\n",
|
722
|
+
" if isinstance(derivation, tuple):\n",
|
723
|
+
" A, derivation1, B, derivation2 = derivation\n",
|
724
|
+
" print(' ' * indent + f\"({A}\")\n",
|
725
|
+
" print_parse_tree(derivation1, indent + 2)\n",
|
726
|
+
" print_parse_tree(derivation2, indent + 2)\n",
|
727
|
+
" print(' ' * indent + f\")\")\n",
|
728
|
+
" else:\n",
|
729
|
+
" print(' ' * indent + derivation)\n",
|
730
|
+
"\n",
|
731
|
+
"# Run the CYK algorithm and get all possible parses\n",
|
732
|
+
"parses = cyk_algorithm(pcfg, sentence)\n",
|
733
|
+
"\n",
|
734
|
+
"# Sort parses by probability in descending order\n",
|
735
|
+
"parses.sort(key=lambda x: x[0], reverse=True)\n",
|
736
|
+
"\n",
|
737
|
+
"# Print the final probabilities and derivations of the sentence\n",
|
738
|
+
"if parses:\n",
|
739
|
+
" for idx, (prob, derivation) in enumerate(parses, start=1):\n",
|
740
|
+
" print(f\"Parse t{idx}: Probability = {prob:.6f}\")\n",
|
741
|
+
" print_parse_tree(derivation)\n",
|
742
|
+
" print() # Print a blank line between parses\n",
|
743
|
+
"else:\n",
|
744
|
+
" print(\"The sentence could not be parsed with the given grammar.\")\n"
|
745
|
+
]
|
746
|
+
},
|
747
|
+
{
|
748
|
+
"cell_type": "code",
|
749
|
+
"execution_count": 3,
|
750
|
+
"id": "b6de0644-7729-499b-a5f5-e687b20f3e57",
|
751
|
+
"metadata": {},
|
752
|
+
"outputs": [
|
753
|
+
{
|
754
|
+
"name": "stdout",
|
755
|
+
"output_type": "stream",
|
756
|
+
"text": [
|
757
|
+
"Parse t1: Probability = 0.000680\n",
|
758
|
+
"(NP astronomers (VP (V saw stars) (P with ears)))\n",
|
759
|
+
"\n"
|
760
|
+
]
|
761
|
+
}
|
762
|
+
],
|
763
|
+
"source": [
|
764
|
+
"from collections import defaultdict\n",
|
765
|
+
"\n",
|
766
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
767
|
+
"pcfg = {\n",
|
768
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
769
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
770
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
771
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
772
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
773
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
774
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
775
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
776
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
777
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
778
|
+
"}\n",
|
779
|
+
"\n",
|
780
|
+
"# The sentence we want to parse\n",
|
781
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
782
|
+
"\n",
|
783
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
784
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
785
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
786
|
+
" \n",
|
787
|
+
" # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
|
788
|
+
" table = defaultdict(list)\n",
|
789
|
+
" \n",
|
790
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
791
|
+
" for i, word in enumerate(sentence):\n",
|
792
|
+
" for rule in pcfg:\n",
|
793
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
794
|
+
" table[(i, i, rule[0])].append((pcfg[rule], rule[1]))\n",
|
795
|
+
" \n",
|
796
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
797
|
+
" for span in range(2, n + 1): # span length\n",
|
798
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
799
|
+
" j = i + span - 1 # end index of the span\n",
|
800
|
+
" for k in range(i, j): # split point\n",
|
801
|
+
" for rule in pcfg:\n",
|
802
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
803
|
+
" A, B, C = rule # A -> B C\n",
|
804
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
805
|
+
" for prob1, derivation1 in table[(i, k, B)]:\n",
|
806
|
+
" for prob2, derivation2 in table[(k + 1, j, C)]:\n",
|
807
|
+
" prob = prob1 * prob2 * pcfg[rule]\n",
|
808
|
+
" table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
|
809
|
+
"\n",
|
810
|
+
" # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
|
811
|
+
" # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
|
812
|
+
" return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
|
813
|
+
"\n",
|
814
|
+
"# Helper function to convert the parse tree into a string\n",
|
815
|
+
"def build_parse_tree(derivation):\n",
|
816
|
+
" if isinstance(derivation, tuple):\n",
|
817
|
+
" A, derivation1, B, derivation2 = derivation\n",
|
818
|
+
" return f\"({A} {build_parse_tree(derivation1)} {build_parse_tree(derivation2)})\"\n",
|
819
|
+
" else:\n",
|
820
|
+
" return derivation\n",
|
821
|
+
"\n",
|
822
|
+
"# Run the CYK algorithm and get all possible parses\n",
|
823
|
+
"parses = cyk_algorithm(pcfg, sentence)\n",
|
824
|
+
"\n",
|
825
|
+
"# Function to sort parses by probability (optional, for better readability)\n",
|
826
|
+
"def sort_parses(parses):\n",
|
827
|
+
" return sorted(parses, key=lambda x: x[0], reverse=True)\n",
|
828
|
+
"\n",
|
829
|
+
"# Sort the parses (optional)\n",
|
830
|
+
"sorted_parses = sort_parses(parses)\n",
|
831
|
+
"\n",
|
832
|
+
"# Print the final probabilities and derivations of the sentence\n",
|
833
|
+
"if sorted_parses:\n",
|
834
|
+
" for idx, (prob, derivation) in enumerate(sorted_parses, start=1):\n",
|
835
|
+
" tree_str = build_parse_tree(derivation)\n",
|
836
|
+
" print(f\"Parse t{idx}: Probability = {prob:.6f}\")\n",
|
837
|
+
" print(tree_str)\n",
|
838
|
+
" print() # Print a blank line between parses\n",
|
839
|
+
"else:\n",
|
840
|
+
" print(\"The sentence could not be parsed with the given grammar.\")\n"
|
841
|
+
]
|
842
|
+
},
|
843
|
+
{
|
844
|
+
"cell_type": "code",
|
845
|
+
"execution_count": 4,
|
846
|
+
"id": "b8b18f54-377d-4288-8511-a283d619c590",
|
847
|
+
"metadata": {},
|
848
|
+
"outputs": [
|
849
|
+
{
|
850
|
+
"name": "stdout",
|
851
|
+
"output_type": "stream",
|
852
|
+
"text": [
|
853
|
+
"Parse t1: Probability = 0.000680\n",
|
854
|
+
"(NP\n",
|
855
|
+
" astronomers\n",
|
856
|
+
" (VP\n",
|
857
|
+
" (V\n",
|
858
|
+
" saw\n",
|
859
|
+
" stars\n",
|
860
|
+
" )\n",
|
861
|
+
" (P\n",
|
862
|
+
" with\n",
|
863
|
+
" ears\n",
|
864
|
+
" )\n",
|
865
|
+
" )\n",
|
866
|
+
")\n",
|
867
|
+
"\n"
|
868
|
+
]
|
869
|
+
}
|
870
|
+
],
|
871
|
+
"source": [
|
872
|
+
"from collections import defaultdict\n",
|
873
|
+
"\n",
|
874
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
875
|
+
"pcfg = {\n",
|
876
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
877
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
878
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
879
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
880
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
881
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
882
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
883
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
884
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
885
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
886
|
+
"}\n",
|
887
|
+
"\n",
|
888
|
+
"# The sentence we want to parse\n",
|
889
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
890
|
+
"\n",
|
891
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
892
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
893
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
894
|
+
" \n",
|
895
|
+
" # Table to store probabilities: (start_index, end_index, non-terminal) -> list of (prob, derivation)\n",
|
896
|
+
" table = defaultdict(list)\n",
|
897
|
+
" \n",
|
898
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
899
|
+
" for i, word in enumerate(sentence):\n",
|
900
|
+
" for rule in pcfg:\n",
|
901
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
902
|
+
" table[(i, i, rule[0])].append((pcfg[rule], word))\n",
|
903
|
+
" \n",
|
904
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
905
|
+
" for span in range(2, n + 1): # span length\n",
|
906
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
907
|
+
" j = i + span - 1 # end index of the span\n",
|
908
|
+
" for k in range(i, j): # split point\n",
|
909
|
+
" for rule in pcfg:\n",
|
910
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
911
|
+
" A, B, C = rule # A -> B C\n",
|
912
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
913
|
+
" for prob1, derivation1 in table[(i, k, B)]:\n",
|
914
|
+
" for prob2, derivation2 in table[(k + 1, j, C)]:\n",
|
915
|
+
" prob = prob1 * prob2 * pcfg[rule]\n",
|
916
|
+
" table[(i, j, A)].append((prob, (B, derivation1, C, derivation2)))\n",
|
917
|
+
"\n",
|
918
|
+
" # Step 3: Return the list of possible derivations for the whole sentence as 'S' (complete sentence)\n",
|
919
|
+
" # The final probability for the entire sentence to be an S should be in table[(0, n-1, 'S')]\n",
|
920
|
+
" return table[(0, n-1, 'S')] # List of all parses (each with a probability and derivation)\n",
|
921
|
+
"\n",
|
922
|
+
"# Helper function to print the parse tree in a readable format\n",
|
923
|
+
"def print_parse_tree(derivation, indent=0):\n",
|
924
|
+
" if isinstance(derivation, tuple):\n",
|
925
|
+
" A, derivation1, B, derivation2 = derivation\n",
|
926
|
+
" print(' ' * indent + f\"({A}\")\n",
|
927
|
+
" print_parse_tree(derivation1, indent + 2)\n",
|
928
|
+
" print_parse_tree(derivation2, indent + 2)\n",
|
929
|
+
" print(' ' * indent + f\")\")\n",
|
930
|
+
" else:\n",
|
931
|
+
" print(' ' * indent + derivation)\n",
|
932
|
+
"\n",
|
933
|
+
"# Run the CYK algorithm and get all possible parses\n",
|
934
|
+
"parses = cyk_algorithm(pcfg, sentence)\n",
|
935
|
+
"\n",
|
936
|
+
"# Sort parses by probability in descending order\n",
|
937
|
+
"parses.sort(key=lambda x: x[0], reverse=True)\n",
|
938
|
+
"\n",
|
939
|
+
"# Print the final probabilities and derivations of the sentence\n",
|
940
|
+
"if parses:\n",
|
941
|
+
" for idx, (prob, derivation) in enumerate(parses, start=1):\n",
|
942
|
+
" print(f\"Parse t{idx}: Probability = {prob:.6f}\")\n",
|
943
|
+
" print_parse_tree(derivation)\n",
|
944
|
+
" print() # Print a blank line between parses\n",
|
945
|
+
"else:\n",
|
946
|
+
" print(\"The sentence could not be parsed with the given grammar.\")"
|
947
|
+
]
|
948
|
+
},
|
949
|
+
{
|
950
|
+
"cell_type": "code",
|
951
|
+
"execution_count": 2,
|
952
|
+
"id": "66fec238-991f-4ded-906e-0f558e8630ea",
|
953
|
+
"metadata": {},
|
954
|
+
"outputs": [
|
955
|
+
{
|
956
|
+
"name": "stdout",
|
957
|
+
"output_type": "stream",
|
958
|
+
"text": [
|
959
|
+
"Final Probability of the sentence: 0.000680\n",
|
960
|
+
"Parse Tree: (S NP (VP (VP V NP) (PP P NP)))\n"
|
961
|
+
]
|
962
|
+
}
|
963
|
+
],
|
964
|
+
"source": [
|
965
|
+
"from collections import defaultdict\n",
|
966
|
+
"\n",
|
967
|
+
"# Probabilistic context-free grammar (PCFG) rules with probabilities\n",
|
968
|
+
"pcfg = {\n",
|
969
|
+
" ('S', 'NP', 'VP'): 1.0, # S -> NP VP\n",
|
970
|
+
" ('VP', 'V', 'NP'): 0.7, # VP -> V NP\n",
|
971
|
+
" ('VP', 'VP', 'PP'): 0.3, # VP -> VP PP\n",
|
972
|
+
" ('PP', 'P', 'NP'): 1.0, # PP -> P NP\n",
|
973
|
+
" ('NP', 'astronomers'): 0.1, # NP -> astronomers\n",
|
974
|
+
" ('NP', 'ears'): 0.18, # NP -> ears\n",
|
975
|
+
" ('NP', 'stars'): 0.18, # NP -> stars\n",
|
976
|
+
" ('NP', 'telescopes'): 0.18, # NP -> telescopes\n",
|
977
|
+
" ('V', 'saw'): 1.0, # V -> saw\n",
|
978
|
+
" ('P', 'with'): 1.0 # P -> with\n",
|
979
|
+
"}\n",
|
980
|
+
"\n",
|
981
|
+
"# The sentence we want to parse\n",
|
982
|
+
"sentence = \"astronomers saw stars with ears\".split()\n",
|
983
|
+
"\n",
|
984
|
+
"# Function to perform the CYK algorithm and calculate inside probabilities\n",
|
985
|
+
"def cyk_algorithm(pcfg, sentence):\n",
|
986
|
+
" n = len(sentence) # Length of the sentence (number of words)\n",
|
987
|
+
" \n",
|
988
|
+
" # Table to store probabilities\n",
|
989
|
+
" table = defaultdict(float)\n",
|
990
|
+
" backpointer = defaultdict(lambda: None)\n",
|
991
|
+
"\n",
|
992
|
+
" # Step 1: Initialize the table for single words (length 1 spans)\n",
|
993
|
+
" for i, word in enumerate(sentence):\n",
|
994
|
+
" for rule in pcfg:\n",
|
995
|
+
" if len(rule) == 2 and rule[1] == word: # Match terminal rules like NP -> astronomers\n",
|
996
|
+
" table[(i, i, rule[0])] = pcfg[rule]\n",
|
997
|
+
"\n",
|
998
|
+
" # Step 2: Fill the table for larger spans (length > 1)\n",
|
999
|
+
" for span in range(2, n + 1): # span length\n",
|
1000
|
+
" for i in range(n - span + 1): # start index of the span\n",
|
1001
|
+
" j = i + span - 1 # end index of the span\n",
|
1002
|
+
" for k in range(i, j): # split point\n",
|
1003
|
+
" for rule in pcfg:\n",
|
1004
|
+
" if len(rule) == 3: # binary rule like S -> NP VP\n",
|
1005
|
+
" A, B, C = rule # A -> B C\n",
|
1006
|
+
" if (i, k, B) in table and (k + 1, j, C) in table:\n",
|
1007
|
+
" prob = table[(i, k, B)] * table[(k + 1, j, C)] * pcfg[rule]\n",
|
1008
|
+
" if prob > table[(i, j, A)]:\n",
|
1009
|
+
" table[(i, j, A)] = prob\n",
|
1010
|
+
" backpointer[(i, j, A)] = (B, C, i, k, j)\n",
|
1011
|
+
"\n",
|
1012
|
+
" # Step 3: Return the final result for the whole sentence as an 'S'\n",
|
1013
|
+
" return table[(0, n-1, 'S')], backpointer\n",
|
1014
|
+
"\n",
|
1015
|
+
"# Function to build the parse tree from the backpointer\n",
|
1016
|
+
"def build_parse_tree(backpointer, i, j, A):\n",
|
1017
|
+
" if (i, j, A) not in backpointer or backpointer[(i, j, A)] is None:\n",
|
1018
|
+
" return A # Base case: return the non-terminal if no children\n",
|
1019
|
+
"\n",
|
1020
|
+
" B, C, left_start, split, right_end = backpointer[(i, j, A)]\n",
|
1021
|
+
" left_tree = build_parse_tree(backpointer, left_start, split, B)\n",
|
1022
|
+
" right_tree = build_parse_tree(backpointer, split + 1, right_end, C)\n",
|
1023
|
+
" return f'({A} {left_tree} {right_tree})'\n",
|
1024
|
+
"\n",
|
1025
|
+
"# Run the CYK algorithm and get the final probability and backpointer\n",
|
1026
|
+
"final_prob, backpointer = cyk_algorithm(pcfg, sentence)\n",
|
1027
|
+
"\n",
|
1028
|
+
"# Print the final probability of the sentence\n",
|
1029
|
+
"if final_prob > 0:\n",
|
1030
|
+
" print(f\"Final Probability of the sentence: {final_prob:.6f}\")\n",
|
1031
|
+
" parse_tree = build_parse_tree(backpointer, 0, len(sentence) - 1, 'S')\n",
|
1032
|
+
" print(\"Parse Tree:\", parse_tree)\n",
|
1033
|
+
"else:\n",
|
1034
|
+
" print(\"The sentence could not be parsed with the given grammar.\")"
|
1035
|
+
]
|
1036
|
+
},
|
1037
|
+
{
|
1038
|
+
"cell_type": "code",
|
1039
|
+
"execution_count": null,
|
1040
|
+
"id": "c4a04fbc-be02-4f88-9147-7b2b4497ace3",
|
1041
|
+
"metadata": {},
|
1042
|
+
"outputs": [],
|
1043
|
+
"source": []
|
1044
|
+
}
|
1045
|
+
],
|
1046
|
+
"metadata": {
|
1047
|
+
"kernelspec": {
|
1048
|
+
"display_name": "Python 3 (ipykernel)",
|
1049
|
+
"language": "python",
|
1050
|
+
"name": "python3"
|
1051
|
+
},
|
1052
|
+
"language_info": {
|
1053
|
+
"codemirror_mode": {
|
1054
|
+
"name": "ipython",
|
1055
|
+
"version": 3
|
1056
|
+
},
|
1057
|
+
"file_extension": ".py",
|
1058
|
+
"mimetype": "text/x-python",
|
1059
|
+
"name": "python",
|
1060
|
+
"nbconvert_exporter": "python",
|
1061
|
+
"pygments_lexer": "ipython3",
|
1062
|
+
"version": "3.11.7"
|
1063
|
+
}
|
1064
|
+
},
|
1065
|
+
"nbformat": 4,
|
1066
|
+
"nbformat_minor": 5
|
1067
|
+
}
|