noshot 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/__init__.py +1 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(A) Breadth First Search.ipynb +112 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(B) Depth First Search.ipynb +111 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(C) Uniform Cost Search.ipynb +134 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(D) Depth Limites Search.ipynb +115 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(E) Iterative Deepening DFS.ipynb +123 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/2_ANOVA.csv +769 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA (Repeated Measure).ipynb +126 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA.ipynb +134 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Sample 1 Way ANOVA Test.ipynb +119 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Two Way ANOVA.ipynb +138 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/reaction_time.csv +5 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sample_data.csv +16 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sleep_deprivation.csv +4 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/3_Linear.csv +4802 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression LAB.ipynb +113 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression New- sklearn.ipynb +118 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression.ipynb +148 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/house_rate.csv +22 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression New- sklearn.ipynb +128 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression.ipynb +145 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/default.csv +1001 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/hours_scores_records.csv +101 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(A) Astar.ipynb +256 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(B) IDAstar.ipynb +157 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(C) SMAstar.ipynb +178 -0
- noshot/data/AIDS CN NLP/AIDS/3. Genetic Algorithm/Genetic.ipynb +95 -0
- noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Simulated Annealing.ipynb +74 -0
- noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Sudoku Simulated Annealing.ipynb +103 -0
- noshot/data/AIDS CN NLP/AIDS/5. Alpha Beta Pruning/AlphaBetaPruning.ipynb +182 -0
- noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(A) CSP House Allocation.ipynb +120 -0
- noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(B) CSP Map Coloring.ipynb +125 -0
- noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/Random Sampling.ipynb +73 -0
- noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test Hash Function.ipynb +141 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test.ipynb +151 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/1_heart.csv +304 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/Independent T Test.ipynb +119 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/Paired T Test.ipynb +118 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test Hash Function.ipynb +142 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test.ipynb +158 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/iq_test.csv +0 -0
- noshot/data/AIDS CN NLP/AIDS/Others (AllinOne)/All In One.ipynb +4581 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/chat.java +81 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/LAN.tcl +65 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/analysis.awk +44 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/complexdcf.tcl +229 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/file_to_send.txt +2 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/filetransfer.java +119 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/rmi.java +56 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.awk +25 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.tcl +81 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.awk +27 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.tcl +153 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/analysis.awk +27 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/sack.tcl +86 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/vegas.tcl +86 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/analysis.awk +28 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/reno.tcl +78 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/tahoe.tcl +79 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/analysis.awk +27 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/flow.tcl +163 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/DV.tcl +111 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/LS.tcl +106 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/analysis.awk +36 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/analysis.awk +20 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/broadcast.tcl +76 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/multicast.tcl +103 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/DHCP.java +125 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/procedure.png +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/1-Prereqs.py +18 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/2-Chi2test.py +83 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/2-T-test.py +79 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/3-WSD-nb.py +53 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/4-Hindle-Rooth.py +53 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/5-HMM-Trellis.py +82 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/6-HMM-Viterbi.py +16 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/7-PCFG-parsetree.py +15 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Chi2test.ipynb +285 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Hindle-Rooth.ipynb +179 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 10 - Text generator using LSTM.ipynb +1461 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 11 NMT.ipynb +2307 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/PCFG.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Prereqs.ipynb +131 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/T test.ipynb +252 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/TFIDF BOW.ipynb +171 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Trellis.ipynb +244 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/WSD.ipynb +645 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Word2Vec.ipynb +93 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/tokenizer.ipynb +370 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/training_tokenizer.txt +6 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/exp0.ipynb +274 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/lab2.ipynb +905 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/test.txt +1 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/tokenizing.ipynb +272 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/collocation.ipynb +332 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/lab3.ipynb +549 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/nlp.txt +1 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/Lab4-NLP-Exp-2.ipynb +817 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/collocation.ipynb +332 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/NLP-Lab-5-Exp3.ipynb +231 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/word-sense-disambiguation.ipynb +507 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab06(additional-exercise)/lab6.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP Exp 4.ipynb +255 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP_Exp_5.ipynb +159 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab08(PCFG)/PCFG.ipynb +282 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/Lab 9 - MLP classifier.ipynb +670 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/MLP-alternative-code.ipynb +613 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/hindle-rooth-algorithm.ipynb +74 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab10(LSTM)/Lab_10_Text_generator_using_LSTM.ipynb +480 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Machine-translation.ipynb +445 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Viterbi-PCFG.ipynb +105 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/corpora_tools.py +87 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/data_utils.py +11 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/train_translator.py +83 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab12(Information-Extraction)/Information_Extraction.ipynb +201 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Backtrack-without-Verbitri.ipynb +185 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Backward-Procedure.ipynb +597 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Bag_of.ipynb +1422 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/CYK-algorithm.ipynb +1067 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Forward-Procedure.ipynb +477 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/LSTM.ipynb +1290 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 10 - Text generator using LSTM.ipynb +1461 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 11 NMT.ipynb +2307 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-4.ipynb +216 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-5.ipynb +216 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/abc.txt +6 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/ex-1-nltk.ipynb +711 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/ex-2-nlp.ipynb +267 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/exp8&9.ipynb +305 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/hind.ipynb +287 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/lab66.ipynb +752 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/leb_3.ipynb +612 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/naive_bayes_classifier.pkl +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_1.ipynb +3008 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_2.ipynb +3095 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlplab-9.ipynb +295 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nltk-ex-4.ipynb +506 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text1.txt +48 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text2.txt +8 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text3.txt +48 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/translation-rnn.ipynb +812 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/word2vector.ipynb +173 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Backward Procedure Algorithm.ipynb +179 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Chi Square Collocation.ipynb +208 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Collocation (T test).ipynb +188 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Experiment 1.ipynb +437 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Forward Procedure Algorithm.ipynb +132 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Hindle Rooth.ipynb +414 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/MachineTranslation.ipynb +368 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using MLPClassifier.ipynb +86 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using Tensorflow.ipynb +112 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/PCFG Inside Probability.ipynb +451 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Text Generation using LSTM.ipynb +297 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Viterbi.ipynb +310 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Word Sense Disambiguation.ipynb +335 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/10.Text Generation using LSTM.ipynb +316 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/11.Machine Translation.ipynb +868 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/2.T and Chi2 Test.ipynb +204 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/3.Word Sense Diambiguation.ipynb +234 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/4.Hinddle and Rooth.ipynb +128 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/5.Forward and Backward.ipynb +149 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/6.Viterbi.ipynb +111 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG Parse Tree.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG using cyk.ipynb +101 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/8.Bag of words and TF-IDF.ipynb +310 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/9.Word2Vector.ipynb +78 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/NLP ALL In One.ipynb +2619 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/sample1.txt +15 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/sample2.txt +4 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/word2vec_model.bin +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/1. Tokenize, Tagging, NER, Parse Tree.ipynb +312 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/2. T Test and Chi2 Test.ipynb +185 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/3. Naive Bayes WSD.ipynb +199 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/4. Hinddle and Rooth.ipynb +151 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/5 and 6 FWD, BWD, Viterbi.ipynb +164 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/7. PCFG using CYK.ipynb +383 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/8. BOW and TF-IDF.ipynb +252 -0
- noshot/data/AIDS CN NLP/Ubuntu CN Lab.iso +0 -0
- noshot/main.py +47 -0
- noshot-0.1.0.dist-info/LICENSE.txt +21 -0
- noshot-0.1.0.dist-info/METADATA +65 -0
- noshot-0.1.0.dist-info/RECORD +210 -0
- noshot-0.1.0.dist-info/WHEEL +5 -0
- noshot-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,905 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "markdown",
|
5
|
+
"id": "8f151375",
|
6
|
+
"metadata": {},
|
7
|
+
"source": [
|
8
|
+
"# Lab 2\n",
|
9
|
+
"\n",
|
10
|
+
"## Stemming Algorithms"
|
11
|
+
]
|
12
|
+
},
|
13
|
+
{
|
14
|
+
"cell_type": "markdown",
|
15
|
+
"id": "36b906d7",
|
16
|
+
"metadata": {},
|
17
|
+
"source": [
|
18
|
+
"### Porter Stemming Algorithm"
|
19
|
+
]
|
20
|
+
},
|
21
|
+
{
|
22
|
+
"cell_type": "code",
|
23
|
+
"execution_count": 1,
|
24
|
+
"id": "451ac318",
|
25
|
+
"metadata": {},
|
26
|
+
"outputs": [
|
27
|
+
{
|
28
|
+
"data": {
|
29
|
+
"text/plain": [
|
30
|
+
"'write'"
|
31
|
+
]
|
32
|
+
},
|
33
|
+
"execution_count": 1,
|
34
|
+
"metadata": {},
|
35
|
+
"output_type": "execute_result"
|
36
|
+
}
|
37
|
+
],
|
38
|
+
"source": [
|
39
|
+
"import nltk\n",
|
40
|
+
"from nltk.stem import PorterStemmer\n",
|
41
|
+
"word_stemmer = PorterStemmer()\n",
|
42
|
+
"word_stemmer = PorterStemmer()\n",
|
43
|
+
"word_stemmer.stem('writing')"
|
44
|
+
]
|
45
|
+
},
|
46
|
+
{
|
47
|
+
"cell_type": "code",
|
48
|
+
"execution_count": 2,
|
49
|
+
"id": "4dede17e",
|
50
|
+
"metadata": {},
|
51
|
+
"outputs": [
|
52
|
+
{
|
53
|
+
"data": {
|
54
|
+
"text/plain": [
|
55
|
+
"'eat'"
|
56
|
+
]
|
57
|
+
},
|
58
|
+
"execution_count": 2,
|
59
|
+
"metadata": {},
|
60
|
+
"output_type": "execute_result"
|
61
|
+
}
|
62
|
+
],
|
63
|
+
"source": [
|
64
|
+
"word_stemmer.stem('eating')"
|
65
|
+
]
|
66
|
+
},
|
67
|
+
{
|
68
|
+
"cell_type": "markdown",
|
69
|
+
"id": "fdc45a75",
|
70
|
+
"metadata": {},
|
71
|
+
"source": [
|
72
|
+
"### Lancaster Stemming Algorithm"
|
73
|
+
]
|
74
|
+
},
|
75
|
+
{
|
76
|
+
"cell_type": "code",
|
77
|
+
"execution_count": 3,
|
78
|
+
"id": "01c9cd99",
|
79
|
+
"metadata": {},
|
80
|
+
"outputs": [
|
81
|
+
{
|
82
|
+
"data": {
|
83
|
+
"text/plain": [
|
84
|
+
"'eat'"
|
85
|
+
]
|
86
|
+
},
|
87
|
+
"execution_count": 3,
|
88
|
+
"metadata": {},
|
89
|
+
"output_type": "execute_result"
|
90
|
+
}
|
91
|
+
],
|
92
|
+
"source": [
|
93
|
+
"import nltk\n",
|
94
|
+
"from nltk.stem import LancasterStemmer\n",
|
95
|
+
"Lanc_stemmer = LancasterStemmer()\n",
|
96
|
+
"Lanc_stemmer = LancasterStemmer()\n",
|
97
|
+
"Lanc_stemmer.stem('eats')"
|
98
|
+
]
|
99
|
+
},
|
100
|
+
{
|
101
|
+
"cell_type": "code",
|
102
|
+
"execution_count": 4,
|
103
|
+
"id": "7d22b098",
|
104
|
+
"metadata": {},
|
105
|
+
"outputs": [
|
106
|
+
{
|
107
|
+
"data": {
|
108
|
+
"text/plain": [
|
109
|
+
"'run'"
|
110
|
+
]
|
111
|
+
},
|
112
|
+
"execution_count": 4,
|
113
|
+
"metadata": {},
|
114
|
+
"output_type": "execute_result"
|
115
|
+
}
|
116
|
+
],
|
117
|
+
"source": [
|
118
|
+
"Lanc_stemmer.stem('runs')"
|
119
|
+
]
|
120
|
+
},
|
121
|
+
{
|
122
|
+
"cell_type": "markdown",
|
123
|
+
"id": "f7ee7a63",
|
124
|
+
"metadata": {},
|
125
|
+
"source": [
|
126
|
+
"#### Regular Expression Stemming Algorithm"
|
127
|
+
]
|
128
|
+
},
|
129
|
+
{
|
130
|
+
"cell_type": "code",
|
131
|
+
"execution_count": 5,
|
132
|
+
"id": "e19ac64d",
|
133
|
+
"metadata": {},
|
134
|
+
"outputs": [
|
135
|
+
{
|
136
|
+
"data": {
|
137
|
+
"text/plain": [
|
138
|
+
"'eat'"
|
139
|
+
]
|
140
|
+
},
|
141
|
+
"execution_count": 5,
|
142
|
+
"metadata": {},
|
143
|
+
"output_type": "execute_result"
|
144
|
+
}
|
145
|
+
],
|
146
|
+
"source": [
|
147
|
+
"import nltk\n",
|
148
|
+
"from nltk.stem import RegexpStemmer\n",
|
149
|
+
"Reg_stemmer = RegexpStemmer('ing')\n",
|
150
|
+
"Reg_stemmer.stem('eating')"
|
151
|
+
]
|
152
|
+
},
|
153
|
+
{
|
154
|
+
"cell_type": "code",
|
155
|
+
"execution_count": 16,
|
156
|
+
"id": "5f00fe2b",
|
157
|
+
"metadata": {},
|
158
|
+
"outputs": [
|
159
|
+
{
|
160
|
+
"data": {
|
161
|
+
"text/plain": [
|
162
|
+
"'eat'"
|
163
|
+
]
|
164
|
+
},
|
165
|
+
"execution_count": 16,
|
166
|
+
"metadata": {},
|
167
|
+
"output_type": "execute_result"
|
168
|
+
}
|
169
|
+
],
|
170
|
+
"source": [
|
171
|
+
"import nltk\n",
|
172
|
+
"from nltk.stem import RegexpStemmer\n",
|
173
|
+
"Reg_stemmer = RegexpStemmer('ing')\n",
|
174
|
+
"Reg_stemmer.stem('ingeat')"
|
175
|
+
]
|
176
|
+
},
|
177
|
+
{
|
178
|
+
"cell_type": "markdown",
|
179
|
+
"id": "8cb6b188",
|
180
|
+
"metadata": {},
|
181
|
+
"source": [
|
182
|
+
"### Snowball stemming algorithm"
|
183
|
+
]
|
184
|
+
},
|
185
|
+
{
|
186
|
+
"cell_type": "code",
|
187
|
+
"execution_count": 9,
|
188
|
+
"id": "03fa74c5",
|
189
|
+
"metadata": {},
|
190
|
+
"outputs": [
|
191
|
+
{
|
192
|
+
"data": {
|
193
|
+
"text/plain": [
|
194
|
+
"('arabic',\n",
|
195
|
+
" 'danish',\n",
|
196
|
+
" 'dutch',\n",
|
197
|
+
" 'english',\n",
|
198
|
+
" 'finnish',\n",
|
199
|
+
" 'french',\n",
|
200
|
+
" 'german',\n",
|
201
|
+
" 'hungarian',\n",
|
202
|
+
" 'italian',\n",
|
203
|
+
" 'norwegian',\n",
|
204
|
+
" 'porter',\n",
|
205
|
+
" 'portuguese',\n",
|
206
|
+
" 'romanian',\n",
|
207
|
+
" 'russian',\n",
|
208
|
+
" 'spanish',\n",
|
209
|
+
" 'swedish')"
|
210
|
+
]
|
211
|
+
},
|
212
|
+
"execution_count": 9,
|
213
|
+
"metadata": {},
|
214
|
+
"output_type": "execute_result"
|
215
|
+
}
|
216
|
+
],
|
217
|
+
"source": [
|
218
|
+
"import nltk\n",
|
219
|
+
"from nltk.stem import SnowballStemmer\n",
|
220
|
+
"SnowballStemmer.languages"
|
221
|
+
]
|
222
|
+
},
|
223
|
+
{
|
224
|
+
"cell_type": "code",
|
225
|
+
"execution_count": 13,
|
226
|
+
"id": "16ec90c8",
|
227
|
+
"metadata": {},
|
228
|
+
"outputs": [
|
229
|
+
{
|
230
|
+
"data": {
|
231
|
+
"text/plain": [
|
232
|
+
"'bonjour'"
|
233
|
+
]
|
234
|
+
},
|
235
|
+
"execution_count": 13,
|
236
|
+
"metadata": {},
|
237
|
+
"output_type": "execute_result"
|
238
|
+
}
|
239
|
+
],
|
240
|
+
"source": [
|
241
|
+
"import nltk\n",
|
242
|
+
"from nltk.stem import SnowballStemmer\n",
|
243
|
+
"French_stemmer = SnowballStemmer('french')\n",
|
244
|
+
"French_stemmer.stem ('Bonjoura')"
|
245
|
+
]
|
246
|
+
},
|
247
|
+
{
|
248
|
+
"cell_type": "markdown",
|
249
|
+
"id": "985a7537",
|
250
|
+
"metadata": {},
|
251
|
+
"source": [
|
252
|
+
"## Lemmatization"
|
253
|
+
]
|
254
|
+
},
|
255
|
+
{
|
256
|
+
"cell_type": "code",
|
257
|
+
"execution_count": 14,
|
258
|
+
"id": "49e742a7",
|
259
|
+
"metadata": {
|
260
|
+
"scrolled": true
|
261
|
+
},
|
262
|
+
"outputs": [
|
263
|
+
{
|
264
|
+
"data": {
|
265
|
+
"text/plain": [
|
266
|
+
"'eating'"
|
267
|
+
]
|
268
|
+
},
|
269
|
+
"execution_count": 14,
|
270
|
+
"metadata": {},
|
271
|
+
"output_type": "execute_result"
|
272
|
+
}
|
273
|
+
],
|
274
|
+
"source": [
|
275
|
+
"import nltk\n",
|
276
|
+
"from nltk.stem import WordNetLemmatizer\n",
|
277
|
+
"lemmatizer = WordNetLemmatizer()\n",
|
278
|
+
"lemmatizer.lemmatize('eating')"
|
279
|
+
]
|
280
|
+
},
|
281
|
+
{
|
282
|
+
"cell_type": "code",
|
283
|
+
"execution_count": 17,
|
284
|
+
"id": "991ca92f",
|
285
|
+
"metadata": {},
|
286
|
+
"outputs": [
|
287
|
+
{
|
288
|
+
"data": {
|
289
|
+
"text/plain": [
|
290
|
+
"'book'"
|
291
|
+
]
|
292
|
+
},
|
293
|
+
"execution_count": 17,
|
294
|
+
"metadata": {},
|
295
|
+
"output_type": "execute_result"
|
296
|
+
}
|
297
|
+
],
|
298
|
+
"source": [
|
299
|
+
"import nltk\n",
|
300
|
+
"from nltk.stem import WordNetLemmatizer\n",
|
301
|
+
"lemmatizer = WordNetLemmatizer()\n",
|
302
|
+
"lemmatizer.lemmatize('books')"
|
303
|
+
]
|
304
|
+
},
|
305
|
+
{
|
306
|
+
"cell_type": "markdown",
|
307
|
+
"id": "4a80fc99",
|
308
|
+
"metadata": {},
|
309
|
+
"source": [
|
310
|
+
"## Difference between Stemming & Lemmatization"
|
311
|
+
]
|
312
|
+
},
|
313
|
+
{
|
314
|
+
"cell_type": "code",
|
315
|
+
"execution_count": 18,
|
316
|
+
"id": "c19cb44e",
|
317
|
+
"metadata": {},
|
318
|
+
"outputs": [
|
319
|
+
{
|
320
|
+
"data": {
|
321
|
+
"text/plain": [
|
322
|
+
"'believ'"
|
323
|
+
]
|
324
|
+
},
|
325
|
+
"execution_count": 18,
|
326
|
+
"metadata": {},
|
327
|
+
"output_type": "execute_result"
|
328
|
+
}
|
329
|
+
],
|
330
|
+
"source": [
|
331
|
+
"import nltk\n",
|
332
|
+
"from nltk.stem import PorterStemmer\n",
|
333
|
+
"word_stemmer = PorterStemmer()\n",
|
334
|
+
"word_stemmer.stem('believes')"
|
335
|
+
]
|
336
|
+
},
|
337
|
+
{
|
338
|
+
"cell_type": "code",
|
339
|
+
"execution_count": 20,
|
340
|
+
"id": "cc71bced",
|
341
|
+
"metadata": {},
|
342
|
+
"outputs": [
|
343
|
+
{
|
344
|
+
"data": {
|
345
|
+
"text/plain": [
|
346
|
+
"'belief'"
|
347
|
+
]
|
348
|
+
},
|
349
|
+
"execution_count": 20,
|
350
|
+
"metadata": {},
|
351
|
+
"output_type": "execute_result"
|
352
|
+
}
|
353
|
+
],
|
354
|
+
"source": [
|
355
|
+
"import nltk\n",
|
356
|
+
"from nltk.stem import WordNetLemmatizer\n",
|
357
|
+
"lemmatizer = WordNetLemmatizer()\n",
|
358
|
+
"lemmatizer.lemmatize('believes')"
|
359
|
+
]
|
360
|
+
},
|
361
|
+
{
|
362
|
+
"cell_type": "markdown",
|
363
|
+
"id": "a2c74540",
|
364
|
+
"metadata": {},
|
365
|
+
"source": [
|
366
|
+
"## Ex. 0\n",
|
367
|
+
"\n",
|
368
|
+
"**To Do** (take 100 words) [Write in observation]\n",
|
369
|
+
"1. Tokenizer, Display total no of unique words.\n",
|
370
|
+
"2. Frequency of each word without duplicate entries.\n",
|
371
|
+
"3. Joint probability of each words."
|
372
|
+
]
|
373
|
+
},
|
374
|
+
{
|
375
|
+
"cell_type": "code",
|
376
|
+
"execution_count": 41,
|
377
|
+
"id": "f80aaa04",
|
378
|
+
"metadata": {},
|
379
|
+
"outputs": [
|
380
|
+
{
|
381
|
+
"name": "stdout",
|
382
|
+
"output_type": "stream",
|
383
|
+
"text": [
|
384
|
+
"Tokenized words\n"
|
385
|
+
]
|
386
|
+
},
|
387
|
+
{
|
388
|
+
"data": {
|
389
|
+
"text/plain": [
|
390
|
+
"['Lorem',\n",
|
391
|
+
" 'ipsum',\n",
|
392
|
+
" 'dolor',\n",
|
393
|
+
" 'sit',\n",
|
394
|
+
" 'amet',\n",
|
395
|
+
" 'consectetur',\n",
|
396
|
+
" 'adipiscing',\n",
|
397
|
+
" 'elit',\n",
|
398
|
+
" 'Aliquam',\n",
|
399
|
+
" 'tincidunt',\n",
|
400
|
+
" 'dapibus',\n",
|
401
|
+
" 'sapien',\n",
|
402
|
+
" 'id',\n",
|
403
|
+
" 'tincidunt',\n",
|
404
|
+
" 'Cras',\n",
|
405
|
+
" 'sit',\n",
|
406
|
+
" 'amet',\n",
|
407
|
+
" 'lectus',\n",
|
408
|
+
" 'magna',\n",
|
409
|
+
" 'Praesent',\n",
|
410
|
+
" 'efficitur',\n",
|
411
|
+
" 'ligula',\n",
|
412
|
+
" 'semper',\n",
|
413
|
+
" 'aliquam',\n",
|
414
|
+
" 'tellus',\n",
|
415
|
+
" 'nec',\n",
|
416
|
+
" 'iaculis',\n",
|
417
|
+
" 'lectus',\n",
|
418
|
+
" 'Vestibulum',\n",
|
419
|
+
" 'tincidunt',\n",
|
420
|
+
" 'risus',\n",
|
421
|
+
" 'quis',\n",
|
422
|
+
" 'ligula',\n",
|
423
|
+
" 'congue',\n",
|
424
|
+
" 'nec',\n",
|
425
|
+
" 'accumsan',\n",
|
426
|
+
" 'ipsum',\n",
|
427
|
+
" 'aliquet',\n",
|
428
|
+
" 'Ut',\n",
|
429
|
+
" 'pharetra',\n",
|
430
|
+
" 'ex',\n",
|
431
|
+
" 'non',\n",
|
432
|
+
" 'gravida',\n",
|
433
|
+
" 'mattis',\n",
|
434
|
+
" 'lorem',\n",
|
435
|
+
" 'libero',\n",
|
436
|
+
" 'egestas',\n",
|
437
|
+
" 'sapien',\n",
|
438
|
+
" 'vel',\n",
|
439
|
+
" 'placerat',\n",
|
440
|
+
" 'magna',\n",
|
441
|
+
" 'turpis',\n",
|
442
|
+
" 'a',\n",
|
443
|
+
" 'enim',\n",
|
444
|
+
" 'Nullam',\n",
|
445
|
+
" 'porttitor',\n",
|
446
|
+
" 'leo',\n",
|
447
|
+
" 'ac',\n",
|
448
|
+
" 'risus',\n",
|
449
|
+
" 'sagittis',\n",
|
450
|
+
" 'a',\n",
|
451
|
+
" 'gravida',\n",
|
452
|
+
" 'ligula',\n",
|
453
|
+
" 'dapibus',\n",
|
454
|
+
" 'Donec',\n",
|
455
|
+
" 'eu',\n",
|
456
|
+
" 'tempus',\n",
|
457
|
+
" 'arcu',\n",
|
458
|
+
" 'Vivamus',\n",
|
459
|
+
" 'ac',\n",
|
460
|
+
" 'pellentesque',\n",
|
461
|
+
" 'tellus',\n",
|
462
|
+
" '.',\n",
|
463
|
+
" 'Etiam',\n",
|
464
|
+
" 'felis',\n",
|
465
|
+
" 'dui',\n",
|
466
|
+
" 'sodales',\n",
|
467
|
+
" 'eu',\n",
|
468
|
+
" 'risus',\n",
|
469
|
+
" 'ut',\n",
|
470
|
+
" 'congue',\n",
|
471
|
+
" 'pulvinar',\n",
|
472
|
+
" 'nisi',\n",
|
473
|
+
" 'Vestibulum',\n",
|
474
|
+
" 'vitae',\n",
|
475
|
+
" 'massa',\n",
|
476
|
+
" 'sed',\n",
|
477
|
+
" 'mauris',\n",
|
478
|
+
" 'iaculis',\n",
|
479
|
+
" 'tincidunt',\n",
|
480
|
+
" 'vitae',\n",
|
481
|
+
" 'ut',\n",
|
482
|
+
" 'ipsum',\n",
|
483
|
+
" 'Sed',\n",
|
484
|
+
" 'varius',\n",
|
485
|
+
" 'vitae',\n",
|
486
|
+
" 'turpis',\n",
|
487
|
+
" 'nec',\n",
|
488
|
+
" 'auctor',\n",
|
489
|
+
" 'Ut',\n",
|
490
|
+
" 'pulvinar']"
|
491
|
+
]
|
492
|
+
},
|
493
|
+
"execution_count": 41,
|
494
|
+
"metadata": {},
|
495
|
+
"output_type": "execute_result"
|
496
|
+
}
|
497
|
+
],
|
498
|
+
"source": [
|
499
|
+
"import nltk\n",
|
500
|
+
"from nltk.tokenize import word_tokenize\n",
|
501
|
+
"text = '''Lorem ipsum dolor sit amet consectetur adipiscing elit Aliquam tincidunt dapibus sapien id tincidunt Cras sit amet lectus magna Praesent efficitur ligula semper aliquam tellus nec iaculis lectus Vestibulum tincidunt risus quis ligula congue nec accumsan ipsum aliquet Ut pharetra ex non gravida mattis lorem libero egestas sapien vel placerat magna turpis a enim Nullam porttitor leo ac risus sagittis a gravida ligula dapibus Donec eu tempus arcu Vivamus ac pellentesque tellus. Etiam felis dui sodales eu risus ut congue pulvinar nisi Vestibulum vitae massa sed mauris iaculis tincidunt vitae ut ipsum Sed varius vitae turpis nec auctor Ut pulvinar'''\n",
|
502
|
+
"\n",
|
503
|
+
"print('Tokenized words')\n",
|
504
|
+
"word_tokenize(text)"
|
505
|
+
]
|
506
|
+
},
|
507
|
+
{
|
508
|
+
"cell_type": "code",
|
509
|
+
"execution_count": 40,
|
510
|
+
"id": "1d074d87",
|
511
|
+
"metadata": {},
|
512
|
+
"outputs": [
|
513
|
+
{
|
514
|
+
"name": "stdout",
|
515
|
+
"output_type": "stream",
|
516
|
+
"text": [
|
517
|
+
"{'Lorem': 1, 'ipsum': 3, 'dolor': 1, 'sit': 2, 'amet': 2, 'consectetur': 1, 'adipiscing': 1, 'elit': 1, 'Aliquam': 1, 'tincidunt': 4, 'dapibus': 2, 'sapien': 2, 'id': 1, 'Cras': 1, 'lectus': 2, 'magna': 2, 'Praesent': 1, 'efficitur': 1, 'ligula': 3, 'semper': 1, 'aliquam': 1, 'tellus': 2, 'nec': 3, 'iaculis': 2, 'Vestibulum': 2, 'risus': 3, 'quis': 1, 'congue': 2, 'accumsan': 1, 'aliquet': 1, 'Ut': 2, 'pharetra': 1, 'ex': 1, 'non': 1, 'gravida': 2, 'mattis': 1, 'lorem': 1, 'libero': 1, 'egestas': 1, 'vel': 1, 'placerat': 1, 'turpis': 2, 'a': 2, 'enim': 1, 'Nullam': 1, 'porttitor': 1, 'leo': 1, 'ac': 2, 'sagittis': 1, 'Donec': 1, 'eu': 2, 'tempus': 1, 'arcu': 1, 'Vivamus': 1, 'pellentesque': 1, '.': 1, 'Etiam': 1, 'felis': 1, 'dui': 1, 'sodales': 1, 'ut': 2, 'pulvinar': 2, 'nisi': 1, 'vitae': 3, 'massa': 1, 'sed': 1, 'mauris': 1, 'Sed': 1, 'varius': 1, 'auctor': 1}\n"
|
518
|
+
]
|
519
|
+
}
|
520
|
+
],
|
521
|
+
"source": [
|
522
|
+
"words = nltk.tokenize.word_tokenize(text)\n",
|
523
|
+
"# fdist1 = nltk.FreqDist(words)\n",
|
524
|
+
"\n",
|
525
|
+
"# unique_word_freq = dict((word, freq) for word, freq in fdist1.items() if not word.isdigit())\n",
|
526
|
+
"\n",
|
527
|
+
"# print(unique_word_freq)\n",
|
528
|
+
"\n"
|
529
|
+
]
|
530
|
+
},
|
531
|
+
{
|
532
|
+
"cell_type": "code",
|
533
|
+
"execution_count": 48,
|
534
|
+
"id": "f48e6c6e",
|
535
|
+
"metadata": {},
|
536
|
+
"outputs": [
|
537
|
+
{
|
538
|
+
"name": "stdout",
|
539
|
+
"output_type": "stream",
|
540
|
+
"text": [
|
541
|
+
"Frequency of the word is:\n",
|
542
|
+
"{'Air': 1, 'pollution': 1, 'is': 2, 'a': 2, 'major': 1, 'environmental': 1, 'problem': 1, 'in': 1, 'many': 1, 'parts': 1, 'of': 3, 'the': 3, 'world.': 1, 'It': 1, 'caused': 1, 'by': 1, 'release': 1, 'harmful': 1, 'pollutants': 2, 'into': 1, 'atmosphere.': 1, 'These': 1, 'can': 2, 'come': 2, 'from': 2, 'variety': 1, 'sources,': 2, 'including': 1, 'factories,': 1, 'power': 1, 'plants,': 1, 'cars,': 1, 'and': 2, 'airplanes.': 1, 'They': 1, 'also': 1, 'natural': 1, 'such': 1, 'as': 1, 'volcanoes': 1, 'forest': 1, 'fires.': 1}\n",
|
543
|
+
"Minimum freq for word: 1\n",
|
544
|
+
"Maximum freq for word: 3\n",
|
545
|
+
"Total no of words: 82\n",
|
546
|
+
"Total no of unique words: 82\n"
|
547
|
+
]
|
548
|
+
}
|
549
|
+
],
|
550
|
+
"source": [
|
551
|
+
"mystring=\"Air pollution is a major environmental problem in many parts of the world. It is caused by the release of harmful pollutants into the atmosphere. These pollutants can come from a variety of sources, including factories, power plants, cars, and airplanes. They can also come from natural sources, such as volcanoes and forest fires.\"\n",
|
552
|
+
"mylist=[]\n",
|
553
|
+
"mylist=mystring.split()\n",
|
554
|
+
"freq=[mylist.count(p) for p in mylist]\n",
|
555
|
+
"mini=min([mylist.count(p) for p in mylist])\n",
|
556
|
+
"maxi=max([mylist.count(p) for p in mylist])\n",
|
557
|
+
"total=sum([mylist.count(p) for p in mylist])\n",
|
558
|
+
"print(\"Frequency of the word is:\")\n",
|
559
|
+
"print(dict(zip(mylist,freq)))\n",
|
560
|
+
"print(\"Minimum freq for word:\",mini)\n",
|
561
|
+
"print(\"Maximum freq for word:\",maxi)\n",
|
562
|
+
"print(\"Total no of words:\",total)\n",
|
563
|
+
"print(\"Total no of unique words:\",total1)\n"
|
564
|
+
]
|
565
|
+
},
|
566
|
+
{
|
567
|
+
"cell_type": "code",
|
568
|
+
"execution_count": 50,
|
569
|
+
"id": "478c7d3d",
|
570
|
+
"metadata": {},
|
571
|
+
"outputs": [
|
572
|
+
{
|
573
|
+
"name": "stdout",
|
574
|
+
"output_type": "stream",
|
575
|
+
"text": [
|
576
|
+
"(s (dp (d the) (np dog)) (vp (v chased) (dp (d the) (np cat))))\n"
|
577
|
+
]
|
578
|
+
}
|
579
|
+
],
|
580
|
+
"source": [
|
581
|
+
"from nltk.tree import *\n",
|
582
|
+
"dp1 = Tree('dp', [Tree('d', ['the']), Tree('np', ['dog'])])\n",
|
583
|
+
"dp2 = Tree('dp', [Tree('d', ['the']), Tree('np', ['cat'])])\n",
|
584
|
+
"vp = Tree('vp', [Tree('v', ['chased']), dp2])\n",
|
585
|
+
"tree = Tree('s', [dp1, vp])\n",
|
586
|
+
"print(tree)"
|
587
|
+
]
|
588
|
+
},
|
589
|
+
{
|
590
|
+
"cell_type": "code",
|
591
|
+
"execution_count": 51,
|
592
|
+
"id": "cb31d074",
|
593
|
+
"metadata": {},
|
594
|
+
"outputs": [
|
595
|
+
{
|
596
|
+
"data": {
|
597
|
+
"text/plain": [
|
598
|
+
"('dp', 'dp', 'vp', 's')"
|
599
|
+
]
|
600
|
+
},
|
601
|
+
"execution_count": 51,
|
602
|
+
"metadata": {},
|
603
|
+
"output_type": "execute_result"
|
604
|
+
}
|
605
|
+
],
|
606
|
+
"source": [
|
607
|
+
"dp1.label(), dp2.label(), vp.label(), tree.label()"
|
608
|
+
]
|
609
|
+
},
|
610
|
+
{
|
611
|
+
"cell_type": "code",
|
612
|
+
"execution_count": 52,
|
613
|
+
"id": "f18e510a",
|
614
|
+
"metadata": {},
|
615
|
+
"outputs": [
|
616
|
+
{
|
617
|
+
"name": "stdout",
|
618
|
+
"output_type": "stream",
|
619
|
+
"text": [
|
620
|
+
"cat\n"
|
621
|
+
]
|
622
|
+
}
|
623
|
+
],
|
624
|
+
"source": [
|
625
|
+
"print(tree[1,1,1,0])"
|
626
|
+
]
|
627
|
+
},
|
628
|
+
{
|
629
|
+
"cell_type": "code",
|
630
|
+
"execution_count": 53,
|
631
|
+
"id": "e966248c",
|
632
|
+
"metadata": {},
|
633
|
+
"outputs": [
|
634
|
+
{
|
635
|
+
"name": "stdout",
|
636
|
+
"output_type": "stream",
|
637
|
+
"text": [
|
638
|
+
"[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0), (1, 1, 0, 0), (1, 1, 1), (1, 1, 1, 0)]\n"
|
639
|
+
]
|
640
|
+
}
|
641
|
+
],
|
642
|
+
"source": [
|
643
|
+
"print(tree.treepositions())"
|
644
|
+
]
|
645
|
+
},
|
646
|
+
{
|
647
|
+
"cell_type": "code",
|
648
|
+
"execution_count": 54,
|
649
|
+
"id": "a3d9848d",
|
650
|
+
"metadata": {},
|
651
|
+
"outputs": [
|
652
|
+
{
|
653
|
+
"name": "stdout",
|
654
|
+
"output_type": "stream",
|
655
|
+
"text": [
|
656
|
+
"\\Tree [.s\n",
|
657
|
+
" [.dp [.d the ] [.np dog ] ]\n",
|
658
|
+
" [.vp [.v chased ] [.dp [.d the ] [.np cat ] ] ] ]\n"
|
659
|
+
]
|
660
|
+
}
|
661
|
+
],
|
662
|
+
"source": [
|
663
|
+
"print(tree.pformat_latex_qtree())"
|
664
|
+
]
|
665
|
+
},
|
666
|
+
{
|
667
|
+
"cell_type": "code",
|
668
|
+
"execution_count": 55,
|
669
|
+
"id": "5175df28",
|
670
|
+
"metadata": {},
|
671
|
+
"outputs": [
|
672
|
+
{
|
673
|
+
"name": "stdout",
|
674
|
+
"output_type": "stream",
|
675
|
+
"text": [
|
676
|
+
" s \n",
|
677
|
+
" ________|_____ \n",
|
678
|
+
" | vp \n",
|
679
|
+
" | _____|___ \n",
|
680
|
+
" dp | dp \n",
|
681
|
+
" ___|___ | ___|___ \n",
|
682
|
+
" d np v d np\n",
|
683
|
+
" | | | | | \n",
|
684
|
+
"the dog chased the cat\n",
|
685
|
+
"\n"
|
686
|
+
]
|
687
|
+
}
|
688
|
+
],
|
689
|
+
"source": [
|
690
|
+
"tree.pretty_print()"
|
691
|
+
]
|
692
|
+
},
|
693
|
+
{
|
694
|
+
"cell_type": "code",
|
695
|
+
"execution_count": 56,
|
696
|
+
"id": "1e361781",
|
697
|
+
"metadata": {},
|
698
|
+
"outputs": [
|
699
|
+
{
|
700
|
+
"name": "stdout",
|
701
|
+
"output_type": "stream",
|
702
|
+
"text": [
|
703
|
+
" s \n",
|
704
|
+
" ┌──────────────┴────────┐ \n",
|
705
|
+
" │ vp \n",
|
706
|
+
" │ ┌────────┴──────┐ \n",
|
707
|
+
" dp │ dp \n",
|
708
|
+
" ┌──────┴──────┐ │ ┌──────┴──────┐ \n",
|
709
|
+
" d np v d np\n",
|
710
|
+
" │ │ │ │ │ \n",
|
711
|
+
"the dog chased the cat\n",
|
712
|
+
"\n"
|
713
|
+
]
|
714
|
+
}
|
715
|
+
],
|
716
|
+
"source": [
|
717
|
+
"tree.pretty_print(unicodelines=True, nodedist=4)"
|
718
|
+
]
|
719
|
+
},
|
720
|
+
{
|
721
|
+
"cell_type": "code",
|
722
|
+
"execution_count": 57,
|
723
|
+
"id": "6d67880b",
|
724
|
+
"metadata": {},
|
725
|
+
"outputs": [
|
726
|
+
{
|
727
|
+
"name": "stdout",
|
728
|
+
"output_type": "stream",
|
729
|
+
"text": [
|
730
|
+
"(S (NP I) (VP (V enjoyed) (NP my cookie)))\n"
|
731
|
+
]
|
732
|
+
}
|
733
|
+
],
|
734
|
+
"source": [
|
735
|
+
"tree2 = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')\n",
|
736
|
+
"print(tree2)"
|
737
|
+
]
|
738
|
+
},
|
739
|
+
{
|
740
|
+
"cell_type": "code",
|
741
|
+
"execution_count": 59,
|
742
|
+
"id": "b1786e29",
|
743
|
+
"metadata": {},
|
744
|
+
"outputs": [
|
745
|
+
{
|
746
|
+
"data": {
|
747
|
+
"text/plain": [
|
748
|
+
"True"
|
749
|
+
]
|
750
|
+
},
|
751
|
+
"execution_count": 59,
|
752
|
+
"metadata": {},
|
753
|
+
"output_type": "execute_result"
|
754
|
+
}
|
755
|
+
],
|
756
|
+
"source": [
|
757
|
+
"tree == Tree.fromstring(str(tree))"
|
758
|
+
]
|
759
|
+
},
|
760
|
+
{
|
761
|
+
"cell_type": "code",
|
762
|
+
"execution_count": 60,
|
763
|
+
"id": "e315f11b",
|
764
|
+
"metadata": {},
|
765
|
+
"outputs": [
|
766
|
+
{
|
767
|
+
"data": {
|
768
|
+
"text/plain": [
|
769
|
+
"True"
|
770
|
+
]
|
771
|
+
},
|
772
|
+
"execution_count": 60,
|
773
|
+
"metadata": {},
|
774
|
+
"output_type": "execute_result"
|
775
|
+
}
|
776
|
+
],
|
777
|
+
"source": [
|
778
|
+
"tree2 == Tree.fromstring(str(tree2))"
|
779
|
+
]
|
780
|
+
},
|
781
|
+
{
|
782
|
+
"cell_type": "code",
|
783
|
+
"execution_count": 61,
|
784
|
+
"id": "9d563eb0",
|
785
|
+
"metadata": {},
|
786
|
+
"outputs": [
|
787
|
+
{
|
788
|
+
"data": {
|
789
|
+
"text/plain": [
|
790
|
+
"False"
|
791
|
+
]
|
792
|
+
},
|
793
|
+
"execution_count": 61,
|
794
|
+
"metadata": {},
|
795
|
+
"output_type": "execute_result"
|
796
|
+
}
|
797
|
+
],
|
798
|
+
"source": [
|
799
|
+
"tree == tree2"
|
800
|
+
]
|
801
|
+
},
|
802
|
+
{
|
803
|
+
"cell_type": "code",
|
804
|
+
"execution_count": 62,
|
805
|
+
"id": "8ca59597",
|
806
|
+
"metadata": {},
|
807
|
+
"outputs": [
|
808
|
+
{
|
809
|
+
"data": {
|
810
|
+
"text/plain": [
|
811
|
+
"False"
|
812
|
+
]
|
813
|
+
},
|
814
|
+
"execution_count": 62,
|
815
|
+
"metadata": {},
|
816
|
+
"output_type": "execute_result"
|
817
|
+
}
|
818
|
+
],
|
819
|
+
"source": [
|
820
|
+
"tree == Tree.fromstring(str(tree2))"
|
821
|
+
]
|
822
|
+
},
|
823
|
+
{
|
824
|
+
"cell_type": "code",
|
825
|
+
"execution_count": 63,
|
826
|
+
"id": "cdb17ebd",
|
827
|
+
"metadata": {},
|
828
|
+
"outputs": [
|
829
|
+
{
|
830
|
+
"data": {
|
831
|
+
"text/plain": [
|
832
|
+
"False"
|
833
|
+
]
|
834
|
+
},
|
835
|
+
"execution_count": 63,
|
836
|
+
"metadata": {},
|
837
|
+
"output_type": "execute_result"
|
838
|
+
}
|
839
|
+
],
|
840
|
+
"source": [
|
841
|
+
"tree2 == Tree.fromstring(str(tree))"
|
842
|
+
]
|
843
|
+
},
|
844
|
+
{
|
845
|
+
"cell_type": "code",
|
846
|
+
"execution_count": 64,
|
847
|
+
"id": "ce361e1c",
|
848
|
+
"metadata": {},
|
849
|
+
"outputs": [
|
850
|
+
{
|
851
|
+
"name": "stdout",
|
852
|
+
"output_type": "stream",
|
853
|
+
"text": [
|
854
|
+
"(S (NP I) (VP (V enjoyed) (NP my cookie)))\n"
|
855
|
+
]
|
856
|
+
}
|
857
|
+
],
|
858
|
+
"source": [
|
859
|
+
"tree = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')\n",
|
860
|
+
"print(tree)"
|
861
|
+
]
|
862
|
+
},
|
863
|
+
{
|
864
|
+
"cell_type": "code",
|
865
|
+
"execution_count": 65,
|
866
|
+
"id": "42749bc6",
|
867
|
+
"metadata": {},
|
868
|
+
"outputs": [
|
869
|
+
{
|
870
|
+
"name": "stdout",
|
871
|
+
"output_type": "stream",
|
872
|
+
"text": [
|
873
|
+
"(S (NP I) (VP (V enjoyed) (NP my cookie)))\n",
|
874
|
+
"(S (NP I) (VP (V enjoyed) (NP my cookie)))\n"
|
875
|
+
]
|
876
|
+
}
|
877
|
+
],
|
878
|
+
"source": [
|
879
|
+
"print(Tree.fromstring('[S [NP I] [VP [V enjoyed] [NP my cookie]]]',brackets='[]'))\n",
|
880
|
+
"print(Tree.fromstring('<S <NP I> <VP <V enjoyed> <NP my cookie>>>',brackets='<>'))"
|
881
|
+
]
|
882
|
+
}
|
883
|
+
],
|
884
|
+
"metadata": {
|
885
|
+
"kernelspec": {
|
886
|
+
"display_name": "Python 3",
|
887
|
+
"language": "python",
|
888
|
+
"name": "python3"
|
889
|
+
},
|
890
|
+
"language_info": {
|
891
|
+
"codemirror_mode": {
|
892
|
+
"name": "ipython",
|
893
|
+
"version": 3
|
894
|
+
},
|
895
|
+
"file_extension": ".py",
|
896
|
+
"mimetype": "text/x-python",
|
897
|
+
"name": "python",
|
898
|
+
"nbconvert_exporter": "python",
|
899
|
+
"pygments_lexer": "ipython3",
|
900
|
+
"version": "3.8.8"
|
901
|
+
}
|
902
|
+
},
|
903
|
+
"nbformat": 4,
|
904
|
+
"nbformat_minor": 5
|
905
|
+
}
|