noshot 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/__init__.py +1 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(A) Breadth First Search.ipynb +112 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(B) Depth First Search.ipynb +111 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(C) Uniform Cost Search.ipynb +134 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(D) Depth Limites Search.ipynb +115 -0
- noshot/data/AIDS CN NLP/AIDS/1. Implement Basic Search Strategies/(E) Iterative Deepening DFS.ipynb +123 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/2_ANOVA.csv +769 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA (Repeated Measure).ipynb +126 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/One Way ANOVA.ipynb +134 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Sample 1 Way ANOVA Test.ipynb +119 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/Two Way ANOVA.ipynb +138 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/reaction_time.csv +5 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sample_data.csv +16 -0
- noshot/data/AIDS CN NLP/AIDS/10. ANOVA/sleep_deprivation.csv +4 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/3_Linear.csv +4802 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression LAB.ipynb +113 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression New- sklearn.ipynb +118 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/Linear Regression.ipynb +148 -0
- noshot/data/AIDS CN NLP/AIDS/11. Linear Regression/house_rate.csv +22 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression New- sklearn.ipynb +128 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/Logistic Regression.ipynb +145 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/default.csv +1001 -0
- noshot/data/AIDS CN NLP/AIDS/12. Logistic Regression/hours_scores_records.csv +101 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(A) Astar.ipynb +256 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(B) IDAstar.ipynb +157 -0
- noshot/data/AIDS CN NLP/AIDS/2. Implement A Star And MA Star/(C) SMAstar.ipynb +178 -0
- noshot/data/AIDS CN NLP/AIDS/3. Genetic Algorithm/Genetic.ipynb +95 -0
- noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Simulated Annealing.ipynb +74 -0
- noshot/data/AIDS CN NLP/AIDS/4. Simulated Annealing/Sudoku Simulated Annealing.ipynb +103 -0
- noshot/data/AIDS CN NLP/AIDS/5. Alpha Beta Pruning/AlphaBetaPruning.ipynb +182 -0
- noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(A) CSP House Allocation.ipynb +120 -0
- noshot/data/AIDS CN NLP/AIDS/6. Consraint Satisfaction Problems (CSP)/(B) CSP Map Coloring.ipynb +125 -0
- noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/Random Sampling.ipynb +73 -0
- noshot/data/AIDS CN NLP/AIDS/7. Random Sampling/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test Hash Function.ipynb +141 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/Z Test.ipynb +151 -0
- noshot/data/AIDS CN NLP/AIDS/8. Z Test/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/1_heart.csv +304 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/Independent T Test.ipynb +119 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/Paired T Test.ipynb +118 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test Hash Function.ipynb +142 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/T Test.ipynb +158 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/height_weight_bmi.csv +8389 -0
- noshot/data/AIDS CN NLP/AIDS/9. T Test/iq_test.csv +0 -0
- noshot/data/AIDS CN NLP/AIDS/Others (AllinOne)/All In One.ipynb +4581 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/chat.java +81 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/1. Chat Application/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/LAN.tcl +65 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/analysis.awk +44 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/10. Ethernet LAN IEEE 802.3/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/complexdcf.tcl +229 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/11. Wireless LAN IEEE 802.11/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/file_to_send.txt +2 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/filetransfer.java +119 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/2. File Transfer/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/3. RMI (Remote Method Invocation)/rmi.java +56 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.awk +25 -0
- noshot/data/AIDS CN NLP/CN/4. Wired Network/wired.tcl +81 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.awk +27 -0
- noshot/data/AIDS CN NLP/CN/5. Wireless Network/wireless.tcl +153 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/analysis.awk +27 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/sack.tcl +86 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Sack And Vegas/vegas.tcl +86 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/analysis.awk +28 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/reno.tcl +78 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Congestion Control/Tahoe And Reno/tahoe.tcl +79 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/analysis.awk +27 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/flow.tcl +163 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/TCP Flow Control/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/6. TCP Flow And Congestion Control/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/DV.tcl +111 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/LS.tcl +106 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/analysis.awk +36 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/7. Link State And Distance Vector Routing/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/analysis.awk +20 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/broadcast.tcl +76 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/multicast.tcl +103 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/8. Multicast And Broadcast Routing/procedure.png +0 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/DHCP.java +125 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/output.png +0 -0
- noshot/data/AIDS CN NLP/CN/9. DHCP/procedure.png +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/1-Prereqs.py +18 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/2-Chi2test.py +83 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/2-T-test.py +79 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/3-WSD-nb.py +53 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/4-Hindle-Rooth.py +53 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/5-HMM-Trellis.py +82 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/6-HMM-Viterbi.py +16 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/7-PCFG-parsetree.py +15 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Chi2test.ipynb +285 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Hindle-Rooth.ipynb +179 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 10 - Text generator using LSTM.ipynb +1461 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Lab 11 NMT.ipynb +2307 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/PCFG.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Prereqs.ipynb +131 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/T test.ipynb +252 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/TFIDF BOW.ipynb +171 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Trellis.ipynb +244 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/WSD.ipynb +645 -0
- noshot/data/AIDS CN NLP/NLP/NLP 1/Word2Vec.ipynb +93 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/tokenizer.ipynb +370 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab01(tokenizer)/training_tokenizer.txt +6 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/exp0.ipynb +274 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/lab2.ipynb +905 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/test.txt +1 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab02(stemming)/tokenizing.ipynb +272 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/collocation.ipynb +332 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/lab3.ipynb +549 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab03(parse-tree)/nlp.txt +1 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/Lab4-NLP-Exp-2.ipynb +817 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab04(collocation)/collocation.ipynb +332 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/NLP-Lab-5-Exp3.ipynb +231 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab05(WSD)/word-sense-disambiguation.ipynb +507 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab06(additional-exercise)/lab6.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP Exp 4.ipynb +255 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab07(HMM,Viterbi)/NLP_Exp_5.ipynb +159 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab08(PCFG)/PCFG.ipynb +282 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/Lab 9 - MLP classifier.ipynb +670 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/MLP-alternative-code.ipynb +613 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab09-Hindle-rooth&MLP/hindle-rooth-algorithm.ipynb +74 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab10(LSTM)/Lab_10_Text_generator_using_LSTM.ipynb +480 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Machine-translation.ipynb +445 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/Viterbi-PCFG.ipynb +105 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/corpora_tools.py +87 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/data_utils.py +11 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab11(Viterbi-PCFG,Machine-translation)/train_translator.py +83 -0
- noshot/data/AIDS CN NLP/NLP/NLP 2/Lab12(Information-Extraction)/Information_Extraction.ipynb +201 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Backtrack-without-Verbitri.ipynb +185 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Backward-Procedure.ipynb +597 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Bag_of.ipynb +1422 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/CYK-algorithm.ipynb +1067 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Forward-Procedure.ipynb +477 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/LSTM.ipynb +1290 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 10 - Text generator using LSTM.ipynb +1461 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/Lab 11 NMT.ipynb +2307 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-4.ipynb +216 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/NLP-LAB-5.ipynb +216 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/abc.txt +6 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/ex-1-nltk.ipynb +711 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/ex-2-nlp.ipynb +267 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/exp8&9.ipynb +305 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/hind.ipynb +287 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/lab66.ipynb +752 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/leb_3.ipynb +612 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/naive_bayes_classifier.pkl +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_1.ipynb +3008 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlp_leb_2.ipynb +3095 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nlplab-9.ipynb +295 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/nltk-ex-4.ipynb +506 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text1.txt +48 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text2.txt +8 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/text3.txt +48 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/translation-rnn.ipynb +812 -0
- noshot/data/AIDS CN NLP/NLP/NLP 3/word2vector.ipynb +173 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Backward Procedure Algorithm.ipynb +179 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Chi Square Collocation.ipynb +208 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Collocation (T test).ipynb +188 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Experiment 1.ipynb +437 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Forward Procedure Algorithm.ipynb +132 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Hindle Rooth.ipynb +414 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/MachineTranslation.ipynb +368 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using MLPClassifier.ipynb +86 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Multi Layer Perceptron using Tensorflow.ipynb +112 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/PCFG Inside Probability.ipynb +451 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Text Generation using LSTM.ipynb +297 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Viterbi.ipynb +310 -0
- noshot/data/AIDS CN NLP/NLP/NLP 4/Word Sense Disambiguation.ipynb +335 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/10.Text Generation using LSTM.ipynb +316 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/11.Machine Translation.ipynb +868 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/2.T and Chi2 Test.ipynb +204 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/3.Word Sense Diambiguation.ipynb +234 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/4.Hinddle and Rooth.ipynb +128 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/5.Forward and Backward.ipynb +149 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/6.Viterbi.ipynb +111 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG Parse Tree.ipynb +134 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/7.PCFG using cyk.ipynb +101 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/8.Bag of words and TF-IDF.ipynb +310 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/9.Word2Vector.ipynb +78 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/NLP ALL In One.ipynb +2619 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/sample1.txt +15 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/sample2.txt +4 -0
- noshot/data/AIDS CN NLP/NLP/NLP 5/word2vec_model.bin +0 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/1. Tokenize, Tagging, NER, Parse Tree.ipynb +312 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/2. T Test and Chi2 Test.ipynb +185 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/3. Naive Bayes WSD.ipynb +199 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/4. Hinddle and Rooth.ipynb +151 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/5 and 6 FWD, BWD, Viterbi.ipynb +164 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/7. PCFG using CYK.ipynb +383 -0
- noshot/data/AIDS CN NLP/NLP/NLP 6/8. BOW and TF-IDF.ipynb +252 -0
- noshot/data/AIDS CN NLP/Ubuntu CN Lab.iso +0 -0
- noshot/main.py +47 -0
- noshot-0.1.0.dist-info/LICENSE.txt +21 -0
- noshot-0.1.0.dist-info/METADATA +65 -0
- noshot-0.1.0.dist-info/RECORD +210 -0
- noshot-0.1.0.dist-info/WHEEL +5 -0
- noshot-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1422 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": 10,
|
6
|
+
"id": "d785cbc0-ca92-485c-9ddd-22f29c9e7f46",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [
|
9
|
+
{
|
10
|
+
"name": "stdout",
|
11
|
+
"output_type": "stream",
|
12
|
+
"text": [
|
13
|
+
"ability: 2\n",
|
14
|
+
"accounting: 1\n",
|
15
|
+
"accuracy: 2\n",
|
16
|
+
"accurate: 1\n",
|
17
|
+
"accurately: 1\n",
|
18
|
+
"across: 1\n",
|
19
|
+
"action: 1\n",
|
20
|
+
"actionable: 1\n",
|
21
|
+
"actions: 1\n",
|
22
|
+
"activities: 1\n",
|
23
|
+
"activity: 1\n",
|
24
|
+
"additionally: 1\n",
|
25
|
+
"addressed: 1\n",
|
26
|
+
"addressing: 2\n",
|
27
|
+
"adjusting: 1\n",
|
28
|
+
"advanced: 5\n",
|
29
|
+
"advancement: 1\n",
|
30
|
+
"advancing: 1\n",
|
31
|
+
"age: 1\n",
|
32
|
+
"ai: 34\n",
|
33
|
+
"alert: 1\n",
|
34
|
+
"algorithm: 6\n",
|
35
|
+
"algorithms: 3\n",
|
36
|
+
"allow: 1\n",
|
37
|
+
"allowing: 2\n",
|
38
|
+
"also: 2\n",
|
39
|
+
"amount: 2\n",
|
40
|
+
"analysis: 8\n",
|
41
|
+
"analytical: 1\n",
|
42
|
+
"analytics: 5\n",
|
43
|
+
"analyze: 5\n",
|
44
|
+
"analyzing: 3\n",
|
45
|
+
"anomaly: 1\n",
|
46
|
+
"application: 1\n",
|
47
|
+
"applications: 2\n",
|
48
|
+
"area: 1\n",
|
49
|
+
"artificial: 3\n",
|
50
|
+
"aspect: 1\n",
|
51
|
+
"ass: 1\n",
|
52
|
+
"automated: 1\n",
|
53
|
+
"automatically: 2\n",
|
54
|
+
"automating: 2\n",
|
55
|
+
"automation: 2\n",
|
56
|
+
"autonomous: 2\n",
|
57
|
+
"available: 1\n",
|
58
|
+
"aware: 1\n",
|
59
|
+
"bandwidth: 1\n",
|
60
|
+
"based: 2\n",
|
61
|
+
"becomes: 1\n",
|
62
|
+
"benefit: 1\n",
|
63
|
+
"bias: 4\n",
|
64
|
+
"brings: 1\n",
|
65
|
+
"brought: 1\n",
|
66
|
+
"business: 1\n",
|
67
|
+
"capabilities: 3\n",
|
68
|
+
"capability: 3\n",
|
69
|
+
"care: 1\n",
|
70
|
+
"centralized: 1\n",
|
71
|
+
"chain: 1\n",
|
72
|
+
"characteristic: 1\n",
|
73
|
+
"chart: 1\n",
|
74
|
+
"cleaning: 4\n",
|
75
|
+
"clinical: 1\n",
|
76
|
+
"closer: 1\n",
|
77
|
+
"cloud: 1\n",
|
78
|
+
"combination: 1\n",
|
79
|
+
"competitive: 1\n",
|
80
|
+
"complex: 2\n",
|
81
|
+
"component: 1\n",
|
82
|
+
"computer: 3\n",
|
83
|
+
"computing: 2\n",
|
84
|
+
"concerns: 3\n",
|
85
|
+
"conclusion: 1\n",
|
86
|
+
"constitutes: 1\n",
|
87
|
+
"continues: 1\n",
|
88
|
+
"continuously: 1\n",
|
89
|
+
"convergence: 1\n",
|
90
|
+
"core: 1\n",
|
91
|
+
"correct: 1\n",
|
92
|
+
"course: 1\n",
|
93
|
+
"create: 1\n",
|
94
|
+
"credit: 1\n",
|
95
|
+
"critical: 1\n",
|
96
|
+
"crucial: 3\n",
|
97
|
+
"customer: 2\n",
|
98
|
+
"cybersecurity: 1\n",
|
99
|
+
"dashboard: 1\n",
|
100
|
+
"data: 62\n",
|
101
|
+
"datasets: 3\n",
|
102
|
+
"decision: 11\n",
|
103
|
+
"decisions: 2\n",
|
104
|
+
"deep: 1\n",
|
105
|
+
"deeper: 1\n",
|
106
|
+
"demand: 2\n",
|
107
|
+
"deployed: 2\n",
|
108
|
+
"designed: 1\n",
|
109
|
+
"detect: 3\n",
|
110
|
+
"developing: 1\n",
|
111
|
+
"development: 1\n",
|
112
|
+
"device: 1\n",
|
113
|
+
"diagnosis: 1\n",
|
114
|
+
"digital: 1\n",
|
115
|
+
"discriminatory: 1\n",
|
116
|
+
"disruptions: 1\n",
|
117
|
+
"diverse: 1\n",
|
118
|
+
"drive: 2\n",
|
119
|
+
"driven: 7\n",
|
120
|
+
"dynamically: 1\n",
|
121
|
+
"early: 1\n",
|
122
|
+
"edge: 3\n",
|
123
|
+
"efficiency: 3\n",
|
124
|
+
"effort: 1\n",
|
125
|
+
"efforts: 1\n",
|
126
|
+
"embracing: 1\n",
|
127
|
+
"enabled: 2\n",
|
128
|
+
"enabling: 6\n",
|
129
|
+
"enhanced: 1\n",
|
130
|
+
"enhancing: 4\n",
|
131
|
+
"ensure: 2\n",
|
132
|
+
"ensuring: 2\n",
|
133
|
+
"entity: 1\n",
|
134
|
+
"errors: 1\n",
|
135
|
+
"essay: 1\n",
|
136
|
+
"essential: 2\n",
|
137
|
+
"established: 1\n",
|
138
|
+
"ethical: 5\n",
|
139
|
+
"evaluate: 1\n",
|
140
|
+
"evolve: 1\n",
|
141
|
+
"example: 3\n",
|
142
|
+
"executive: 1\n",
|
143
|
+
"explainable: 1\n",
|
144
|
+
"explore: 1\n",
|
145
|
+
"explores: 1\n",
|
146
|
+
"extracting: 4\n",
|
147
|
+
"facilitated: 2\n",
|
148
|
+
"facilitating: 1\n",
|
149
|
+
"fairness: 1\n",
|
150
|
+
"far: 1\n",
|
151
|
+
"features: 1\n",
|
152
|
+
"field: 1\n",
|
153
|
+
"fields: 1\n",
|
154
|
+
"filter: 1\n",
|
155
|
+
"finance: 2\n",
|
156
|
+
"financial: 2\n",
|
157
|
+
"fluctuations: 1\n",
|
158
|
+
"focus: 3\n",
|
159
|
+
"forecast: 1\n",
|
160
|
+
"forecasting: 1\n",
|
161
|
+
"form: 1\n",
|
162
|
+
"format: 1\n",
|
163
|
+
"fraudulent: 2\n",
|
164
|
+
"full: 1\n",
|
165
|
+
"fundamentally: 1\n",
|
166
|
+
"future: 1\n",
|
167
|
+
"generate: 1\n",
|
168
|
+
"generated: 3\n",
|
169
|
+
"generation: 1\n",
|
170
|
+
"govern: 1\n",
|
171
|
+
"graphical: 1\n",
|
172
|
+
"graphs: 1\n",
|
173
|
+
"guideline: 1\n",
|
174
|
+
"handle: 1\n",
|
175
|
+
"harness: 1\n",
|
176
|
+
"healthcare: 4\n",
|
177
|
+
"hidden: 1\n",
|
178
|
+
"higher: 1\n",
|
179
|
+
"highlighting: 2\n",
|
180
|
+
"historical: 1\n",
|
181
|
+
"human: 4\n",
|
182
|
+
"identify: 3\n",
|
183
|
+
"identifying: 2\n",
|
184
|
+
"image: 1\n",
|
185
|
+
"immediate: 1\n",
|
186
|
+
"impact: 3\n",
|
187
|
+
"impacted: 1\n",
|
188
|
+
"implementing: 1\n",
|
189
|
+
"important: 1\n",
|
190
|
+
"impossible: 1\n",
|
191
|
+
"improve: 1\n",
|
192
|
+
"improved: 2\n",
|
193
|
+
"improving: 1\n",
|
194
|
+
"inadvertently: 1\n",
|
195
|
+
"include: 1\n",
|
196
|
+
"inconsistencies: 1\n",
|
197
|
+
"increasingly: 1\n",
|
198
|
+
"industrial: 1\n",
|
199
|
+
"industries: 1\n",
|
200
|
+
"industry: 1\n",
|
201
|
+
"inform: 1\n",
|
202
|
+
"information: 1\n",
|
203
|
+
"informed: 2\n",
|
204
|
+
"innovation: 2\n",
|
205
|
+
"insight: 3\n",
|
206
|
+
"insights: 1\n",
|
207
|
+
"instance: 1\n",
|
208
|
+
"integration: 1\n",
|
209
|
+
"intelligence: 5\n",
|
210
|
+
"interaction: 1\n",
|
211
|
+
"interactive: 2\n",
|
212
|
+
"introduction: 1\n",
|
213
|
+
"intuitive: 1\n",
|
214
|
+
"invaluable: 1\n",
|
215
|
+
"inventory: 2\n",
|
216
|
+
"investment: 2\n",
|
217
|
+
"involves: 1\n",
|
218
|
+
"involving: 1\n",
|
219
|
+
"issue: 1\n",
|
220
|
+
"key: 2\n",
|
221
|
+
"knowledge: 1\n",
|
222
|
+
"language: 5\n",
|
223
|
+
"large: 1\n",
|
224
|
+
"latency: 1\n",
|
225
|
+
"lead: 1\n",
|
226
|
+
"leading: 2\n",
|
227
|
+
"learn: 1\n",
|
228
|
+
"learning: 4\n",
|
229
|
+
"level: 1\n",
|
230
|
+
"levels: 1\n",
|
231
|
+
"leveraging: 2\n",
|
232
|
+
"like: 3\n",
|
233
|
+
"locally: 1\n",
|
234
|
+
"machine: 3\n",
|
235
|
+
"make: 3\n",
|
236
|
+
"making: 6\n",
|
237
|
+
"management: 2\n",
|
238
|
+
"manner: 1\n",
|
239
|
+
"manual: 1\n",
|
240
|
+
"manually: 1\n",
|
241
|
+
"many: 1\n",
|
242
|
+
"market: 1\n",
|
243
|
+
"marketing: 2\n",
|
244
|
+
"massive: 1\n",
|
245
|
+
"meaningful: 1\n",
|
246
|
+
"media: 1\n",
|
247
|
+
"medical: 1\n",
|
248
|
+
"medium: 1\n",
|
249
|
+
"method: 1\n",
|
250
|
+
"mimic: 1\n",
|
251
|
+
"missing: 1\n",
|
252
|
+
"mitigate: 1\n",
|
253
|
+
"ml: 1\n",
|
254
|
+
"model: 3\n",
|
255
|
+
"models: 2\n",
|
256
|
+
"monitoring: 2\n",
|
257
|
+
"more: 1\n",
|
258
|
+
"multiple: 1\n",
|
259
|
+
"natural: 4\n",
|
260
|
+
"navigation: 1\n",
|
261
|
+
"networks: 1\n",
|
262
|
+
"neural: 1\n",
|
263
|
+
"new: 1\n",
|
264
|
+
"nlp: 6\n",
|
265
|
+
"note: 1\n",
|
266
|
+
"numerous: 2\n",
|
267
|
+
"object: 1\n",
|
268
|
+
"often: 2\n",
|
269
|
+
"one: 1\n",
|
270
|
+
"operational: 1\n",
|
271
|
+
"optimal: 1\n",
|
272
|
+
"optimize: 3\n",
|
273
|
+
"organization: 3\n",
|
274
|
+
"outcome: 1\n",
|
275
|
+
"outcomes: 2\n",
|
276
|
+
"outliers: 1\n",
|
277
|
+
"papers: 1\n",
|
278
|
+
"parameter: 1\n",
|
279
|
+
"particularly: 3\n",
|
280
|
+
"patient: 2\n",
|
281
|
+
"pattern: 1\n",
|
282
|
+
"patterns: 1\n",
|
283
|
+
"perform: 1\n",
|
284
|
+
"perpetuate: 1\n",
|
285
|
+
"personalize: 1\n",
|
286
|
+
"personalized: 1\n",
|
287
|
+
"plans: 1\n",
|
288
|
+
"platform: 1\n",
|
289
|
+
"portfolios: 1\n",
|
290
|
+
"portion: 2\n",
|
291
|
+
"potential: 2\n",
|
292
|
+
"powered: 4\n",
|
293
|
+
"powerful: 1\n",
|
294
|
+
"predict: 3\n",
|
295
|
+
"predictive: 6\n",
|
296
|
+
"preparation: 4\n",
|
297
|
+
"present: 2\n",
|
298
|
+
"primary: 1\n",
|
299
|
+
"process: 5\n",
|
300
|
+
"processes: 3\n",
|
301
|
+
"processing: 13\n",
|
302
|
+
"profound: 1\n",
|
303
|
+
"project: 1\n",
|
304
|
+
"provide: 1\n",
|
305
|
+
"providing: 1\n",
|
306
|
+
"quick: 1\n",
|
307
|
+
"quickly: 1\n",
|
308
|
+
"raise: 1\n",
|
309
|
+
"reaching: 1\n",
|
310
|
+
"real: 7\n",
|
311
|
+
"recent: 1\n",
|
312
|
+
"recognition: 1\n",
|
313
|
+
"recommend: 1\n",
|
314
|
+
"reduces: 1\n",
|
315
|
+
"reducing: 2\n",
|
316
|
+
"regression: 1\n",
|
317
|
+
"regulation: 1\n",
|
318
|
+
"relationship: 1\n",
|
319
|
+
"relevant: 1\n",
|
320
|
+
"relying: 1\n",
|
321
|
+
"representations: 1\n",
|
322
|
+
"representative: 1\n",
|
323
|
+
"required: 1\n",
|
324
|
+
"requirements: 1\n",
|
325
|
+
"research: 2\n",
|
326
|
+
"respect: 1\n",
|
327
|
+
"response: 1\n",
|
328
|
+
"responsible: 1\n",
|
329
|
+
"retail: 1\n",
|
330
|
+
"revolutionized: 1\n",
|
331
|
+
"right: 1\n",
|
332
|
+
"risk: 1\n",
|
333
|
+
"risks: 1\n",
|
334
|
+
"safety: 1\n",
|
335
|
+
"sale: 1\n",
|
336
|
+
"scenario: 1\n",
|
337
|
+
"scenarios: 1\n",
|
338
|
+
"science: 15\n",
|
339
|
+
"scientist: 1\n",
|
340
|
+
"second: 1\n",
|
341
|
+
"sector: 1\n",
|
342
|
+
"seeking: 1\n",
|
343
|
+
"sensors: 1\n",
|
344
|
+
"sentiment: 2\n",
|
345
|
+
"servers: 1\n",
|
346
|
+
"significant: 3\n",
|
347
|
+
"significantly: 2\n",
|
348
|
+
"similarly: 1\n",
|
349
|
+
"social: 2\n",
|
350
|
+
"societal: 1\n",
|
351
|
+
"sophisticated: 1\n",
|
352
|
+
"source: 1\n",
|
353
|
+
"sources: 1\n",
|
354
|
+
"speed: 1\n",
|
355
|
+
"spent: 1\n",
|
356
|
+
"split: 1\n",
|
357
|
+
"stakeholder: 1\n",
|
358
|
+
"stay: 1\n",
|
359
|
+
"step: 1\n",
|
360
|
+
"strategies: 2\n",
|
361
|
+
"stream: 2\n",
|
362
|
+
"streaming: 1\n",
|
363
|
+
"structured: 1\n",
|
364
|
+
"struggle: 1\n",
|
365
|
+
"subfield: 1\n",
|
366
|
+
"suitable: 1\n",
|
367
|
+
"summarization: 1\n",
|
368
|
+
"supply: 1\n",
|
369
|
+
"support: 2\n",
|
370
|
+
"synergy: 1\n",
|
371
|
+
"system: 3\n",
|
372
|
+
"task: 2\n",
|
373
|
+
"tasks: 1\n",
|
374
|
+
"technique: 3\n",
|
375
|
+
"techniques: 1\n",
|
376
|
+
"technology: 1\n",
|
377
|
+
"text: 3\n",
|
378
|
+
"time: 10\n",
|
379
|
+
"today: 2\n",
|
380
|
+
"together: 1\n",
|
381
|
+
"tool: 2\n",
|
382
|
+
"trading: 1\n",
|
383
|
+
"traditional: 1\n",
|
384
|
+
"training: 2\n",
|
385
|
+
"transformation: 1\n",
|
386
|
+
"transformed: 4\n",
|
387
|
+
"transforming: 2\n",
|
388
|
+
"transparent: 1\n",
|
389
|
+
"treatment: 1\n",
|
390
|
+
"trees: 1\n",
|
391
|
+
"trend: 4\n",
|
392
|
+
"trends: 1\n",
|
393
|
+
"triggering: 1\n",
|
394
|
+
"uncover: 1\n",
|
395
|
+
"understand: 1\n",
|
396
|
+
"understanding: 1\n",
|
397
|
+
"unfair: 1\n",
|
398
|
+
"unprecedented: 1\n",
|
399
|
+
"unstructured: 2\n",
|
400
|
+
"use: 2\n",
|
401
|
+
"user: 1\n",
|
402
|
+
"valuable: 3\n",
|
403
|
+
"value: 1\n",
|
404
|
+
"values: 1\n",
|
405
|
+
"various: 2\n",
|
406
|
+
"vast: 2\n",
|
407
|
+
"vehicles: 2\n",
|
408
|
+
"videos: 1\n",
|
409
|
+
"vision: 2\n",
|
410
|
+
"visualization: 7\n",
|
411
|
+
"vital: 1\n",
|
412
|
+
"way: 1\n",
|
413
|
+
"within: 1\n",
|
414
|
+
"without: 1\n",
|
415
|
+
"workflow: 1\n",
|
416
|
+
"world: 1\n",
|
417
|
+
"would: 1\n",
|
418
|
+
"years: 1\n"
|
419
|
+
]
|
420
|
+
}
|
421
|
+
],
|
422
|
+
"source": [
|
423
|
+
"import numpy as np\n",
|
424
|
+
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
425
|
+
"\n",
|
426
|
+
"# Load the text data from the file\n",
|
427
|
+
"with open('text3.txt', 'r') as f:\n",
|
428
|
+
" text_data = f.read()\n",
|
429
|
+
"\n",
|
430
|
+
"# Tokenize the text data\n",
|
431
|
+
"tokens = text_data.split()\n",
|
432
|
+
"\n",
|
433
|
+
"# Remove stopwords\n",
|
434
|
+
"from nltk.corpus import stopwords\n",
|
435
|
+
"stop_words = set(stopwords.words('english'))\n",
|
436
|
+
"tokens = [word for word in tokens if word.lower() not in stop_words]\n",
|
437
|
+
"\n",
|
438
|
+
"# Perform stemming or lemmatization (optional)\n",
|
439
|
+
"from nltk.stem import WordNetLemmatizer\n",
|
440
|
+
"lemmatizer = WordNetLemmatizer()\n",
|
441
|
+
"tokens = [lemmatizer.lemmatize(word) for word in tokens]\n",
|
442
|
+
"\n",
|
443
|
+
"# Join the tokens back into a string\n",
|
444
|
+
"text_data = ' '.join(tokens)\n",
|
445
|
+
"\n",
|
446
|
+
"# Create a CountVectorizer object\n",
|
447
|
+
"vectorizer = CountVectorizer()\n",
|
448
|
+
"\n",
|
449
|
+
"# Fit the vectorizer to the text data and transform it into a matrix\n",
|
450
|
+
"X = vectorizer.fit_transform([text_data])\n",
|
451
|
+
"\n",
|
452
|
+
"# Get the feature names (i.e., the unique words in the document)\n",
|
453
|
+
"feature_names = vectorizer.get_feature_names_out() # Use get_feature_names_out() instead\n",
|
454
|
+
"\n",
|
455
|
+
"# Print the feature names and their corresponding frequencies\n",
|
456
|
+
"for feature, freq in zip(feature_names, X.toarray()[0]):\n",
|
457
|
+
" print(f\"{feature}: {freq}\")"
|
458
|
+
]
|
459
|
+
},
|
460
|
+
{
|
461
|
+
"cell_type": "code",
|
462
|
+
"execution_count": 6,
|
463
|
+
"id": "46eeb633-6817-4852-a61e-7eccc1f018db",
|
464
|
+
"metadata": {},
|
465
|
+
"outputs": [
|
466
|
+
{
|
467
|
+
"name": "stdout",
|
468
|
+
"output_type": "stream",
|
469
|
+
"text": [
|
470
|
+
"Epoch 1/10\n",
|
471
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 160ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
472
|
+
"Epoch 2/10\n",
|
473
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
474
|
+
"Epoch 3/10\n",
|
475
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
476
|
+
"Epoch 4/10\n",
|
477
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
478
|
+
"Epoch 5/10\n",
|
479
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
480
|
+
"Epoch 6/10\n",
|
481
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
482
|
+
"Epoch 7/10\n",
|
483
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
484
|
+
"Epoch 8/10\n",
|
485
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
486
|
+
"Epoch 9/10\n",
|
487
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
488
|
+
"Epoch 10/10\n",
|
489
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 35ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
490
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 19ms/step - accuracy: 1.0000 - loss: 0.0000e+00\n",
|
491
|
+
"Bag of Words model accuracy: 1.000\n",
|
492
|
+
"Epoch 1/10\n"
|
493
|
+
]
|
494
|
+
},
|
495
|
+
{
|
496
|
+
"ename": "ValueError",
|
497
|
+
"evalue": "Exception encountered when calling Sequential.call().\n\n\u001b[1mInput 0 of layer \"dense_6\" is incompatible with the layer: expected axis -1 of input shape to have value 5000, but received input with shape (None, 374)\u001b[0m\n\nArguments received by Sequential.call():\n • inputs=tf.Tensor(shape=(None, 374), dtype=float32)\n • training=True\n • mask=None",
|
498
|
+
"output_type": "error",
|
499
|
+
"traceback": [
|
500
|
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
501
|
+
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
502
|
+
"Cell \u001b[1;32mIn[6], line 92\u001b[0m\n\u001b[0;32m 89\u001b[0m tfidf_model\u001b[38;5;241m.\u001b[39mcompile(loss\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcategorical_crossentropy\u001b[39m\u001b[38;5;124m'\u001b[39m, optimizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madam\u001b[39m\u001b[38;5;124m'\u001b[39m, metrics\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maccuracy\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 91\u001b[0m \u001b[38;5;66;03m# Train the model\u001b[39;00m\n\u001b[1;32m---> 92\u001b[0m tfidf_model\u001b[38;5;241m.\u001b[39mfit(tfidf_train, train_labels_onehot, epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m32\u001b[39m, validation_data\u001b[38;5;241m=\u001b[39m(tfidf_test, test_labels_onehot))\n\u001b[0;32m 94\u001b[0m \u001b[38;5;66;03m# Evaluate the model\u001b[39;00m\n\u001b[0;32m 95\u001b[0m loss, accuracy \u001b[38;5;241m=\u001b[39m tfidf_model\u001b[38;5;241m.\u001b[39mevaluate(tfidf_test, test_labels_onehot)\n",
|
503
|
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\keras\\src\\utils\\traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[0;32m 120\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[0;32m 121\u001b[0m \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 124\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
|
504
|
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\keras\\src\\layers\\input_spec.py:227\u001b[0m, in \u001b[0;36massert_input_compatibility\u001b[1;34m(input_spec, inputs, layer_name)\u001b[0m\n\u001b[0;32m 222\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m axis, value \u001b[38;5;129;01min\u001b[39;00m spec\u001b[38;5;241m.\u001b[39maxes\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m 223\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m shape[axis] \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m {\n\u001b[0;32m 224\u001b[0m value,\n\u001b[0;32m 225\u001b[0m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 226\u001b[0m }:\n\u001b[1;32m--> 227\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 228\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mInput \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minput_index\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m of layer \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlayer_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m is \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 229\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mincompatible with the layer: expected axis \u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 230\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mof input shape to have value \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvalue\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 231\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut received input with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mshape \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 233\u001b[0m )\n\u001b[0;32m 234\u001b[0m \u001b[38;5;66;03m# Check shape.\u001b[39;00m\n\u001b[0;32m 235\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m spec\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
505
|
+
"\u001b[1;31mValueError\u001b[0m: Exception encountered when calling Sequential.call().\n\n\u001b[1mInput 0 of layer \"dense_6\" is incompatible with the layer: expected axis -1 of input shape to have value 5000, but received input with shape (None, 374)\u001b[0m\n\nArguments received by Sequential.call():\n • inputs=tf.Tensor(shape=(None, 374), dtype=float32)\n • training=True\n • mask=None"
|
506
|
+
]
|
507
|
+
}
|
508
|
+
],
|
509
|
+
"source": [
|
510
|
+
"import tensorflow as tf\n",
|
511
|
+
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
|
512
|
+
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
|
513
|
+
"from sklearn.model_selection import train_test_split\n",
|
514
|
+
"from sklearn.metrics import accuracy_score\n",
|
515
|
+
"\n",
|
516
|
+
"# Load the dataset\n",
|
517
|
+
"with open('text3.txt', 'r') as f:\n",
|
518
|
+
" text_data = f.readlines()\n",
|
519
|
+
"\n",
|
520
|
+
"# Split the data into input text and labels\n",
|
521
|
+
"# Split the data into input text and labels\n",
|
522
|
+
"text = []\n",
|
523
|
+
"labels = []\n",
|
524
|
+
"label_map = {} # Create a label map to store unique labels\n",
|
525
|
+
"label_index = 0 # Initialize a label index\n",
|
526
|
+
"for line in text_data:\n",
|
527
|
+
" parts = line.split('\\t')\n",
|
528
|
+
" if len(parts) > 1:\n",
|
529
|
+
" text.append(parts[0])\n",
|
530
|
+
" label = parts[1].strip()\n",
|
531
|
+
" else:\n",
|
532
|
+
" text.append(parts[0])\n",
|
533
|
+
" label = 'default_label' # Replace with your default label\n",
|
534
|
+
"\n",
|
535
|
+
" if label not in label_map:\n",
|
536
|
+
" label_map[label] = label_index\n",
|
537
|
+
" label_index += 1\n",
|
538
|
+
"\n",
|
539
|
+
" labels.append(label_map[label])\n",
|
540
|
+
"\n",
|
541
|
+
"# Split the data into training and testing sets\n",
|
542
|
+
"train_text, test_text, train_labels, test_labels = train_test_split(text, labels, test_size=0.2, random_state=42)\n",
|
543
|
+
"\n",
|
544
|
+
"# One-hot encode the labels\n",
|
545
|
+
"num_classes = len(label_map)\n",
|
546
|
+
"train_labels_onehot = tf.keras.utils.to_categorical(train_labels, num_classes)\n",
|
547
|
+
"test_labels_onehot = tf.keras.utils.to_categorical(test_labels, num_classes)\n",
|
548
|
+
"\n",
|
549
|
+
"# Create a tokenizer to split the text into words\n",
|
550
|
+
"tokenizer = Tokenizer()\n",
|
551
|
+
"tokenizer.fit_on_texts(train_text)\n",
|
552
|
+
"\n",
|
553
|
+
"# Convert the text data into sequences of words\n",
|
554
|
+
"train_sequences = tokenizer.texts_to_sequences(train_text)\n",
|
555
|
+
"test_sequences = tokenizer.texts_to_sequences(test_text)\n",
|
556
|
+
"\n",
|
557
|
+
"# Pad the sequences to have the same length\n",
|
558
|
+
"max_length = 200\n",
|
559
|
+
"padded_train = pad_sequences(train_sequences, maxlen=max_length)\n",
|
560
|
+
"padded_test = pad_sequences(test_sequences, maxlen=max_length)\n",
|
561
|
+
"\n",
|
562
|
+
"# One-hot encode the labels\n",
|
563
|
+
"num_classes = len(set(labels))\n",
|
564
|
+
"train_labels_onehot = tf.keras.utils.to_categorical(train_labels, num_classes)\n",
|
565
|
+
"test_labels_onehot = tf.keras.utils.to_categorical(test_labels, num_classes)\n",
|
566
|
+
"\n",
|
567
|
+
"# Define the Bag of Words model\n",
|
568
|
+
"bow_model = tf.keras.models.Sequential([\n",
|
569
|
+
" tf.keras.layers.Embedding(len(tokenizer.word_index) + 1, 64, input_length=max_length),\n",
|
570
|
+
" tf.keras.layers.Flatten(),\n",
|
571
|
+
" tf.keras.layers.Dense(64, activation='relu'),\n",
|
572
|
+
" tf.keras.layers.Dense(num_classes, activation='softmax')\n",
|
573
|
+
"])\n",
|
574
|
+
"\n",
|
575
|
+
"# Compile the model\n",
|
576
|
+
"bow_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
|
577
|
+
"\n",
|
578
|
+
"# Train the model\n",
|
579
|
+
"bow_model.fit(padded_train, train_labels_onehot, epochs=10, batch_size=32, validation_data=(padded_test, test_labels_onehot))\n",
|
580
|
+
"\n",
|
581
|
+
"# Evaluate the model\n",
|
582
|
+
"loss, accuracy = bow_model.evaluate(padded_test, test_labels_onehot)\n",
|
583
|
+
"print(f'Bag of Words model accuracy: {accuracy:.3f}')\n",
|
584
|
+
"\n",
|
585
|
+
"# Define the TF-IDF model\n",
|
586
|
+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
587
|
+
"tfidf_vectorizer = TfidfVectorizer(max_features=5000)\n",
|
588
|
+
"tfidf_train = tfidf_vectorizer.fit_transform(train_text)\n",
|
589
|
+
"tfidf_test = tfidf_vectorizer.transform(test_text)\n",
|
590
|
+
"\n",
|
591
|
+
"# Define the TF-IDF model\n",
|
592
|
+
"tfidf_model = tf.keras.models.Sequential([\n",
|
593
|
+
" tf.keras.layers.Dense(64, activation='relu', input_shape=(5000,)),\n",
|
594
|
+
" tf.keras.layers.Dense(num_classes, activation='softmax')\n",
|
595
|
+
"])\n",
|
596
|
+
"\n",
|
597
|
+
"# Compile the model\n",
|
598
|
+
"tfidf_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
|
599
|
+
"\n",
|
600
|
+
"# Train the model\n",
|
601
|
+
"tfidf_model.fit(tfidf_train, train_labels_onehot, epochs=10, batch_size=32, validation_data=(tfidf_test, test_labels_onehot))\n",
|
602
|
+
"\n",
|
603
|
+
"# Evaluate the model\n",
|
604
|
+
"loss, accuracy = tfidf_model.evaluate(tfidf_test, test_labels_onehot)\n",
|
605
|
+
"print(f'TF-IDF model accuracy: {accuracy:.3f}')"
|
606
|
+
]
|
607
|
+
},
|
608
|
+
{
|
609
|
+
"cell_type": "code",
|
610
|
+
"execution_count": 11,
|
611
|
+
"id": "52b1dcc9-9060-430f-a3c1-6fa739ccd8a7",
|
612
|
+
"metadata": {},
|
613
|
+
"outputs": [
|
614
|
+
{
|
615
|
+
"name": "stdout",
|
616
|
+
"output_type": "stream",
|
617
|
+
"text": [
|
618
|
+
"Epoch 1/10\n",
|
619
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 154ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
620
|
+
"Epoch 2/10\n",
|
621
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
622
|
+
"Epoch 3/10\n",
|
623
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 35ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
624
|
+
"Epoch 4/10\n",
|
625
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
626
|
+
"Epoch 5/10\n",
|
627
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
628
|
+
"Epoch 6/10\n",
|
629
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 32ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
630
|
+
"Epoch 7/10\n",
|
631
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 34ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
632
|
+
"Epoch 8/10\n",
|
633
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 46ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
634
|
+
"Epoch 9/10\n",
|
635
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
636
|
+
"Epoch 10/10\n",
|
637
|
+
"\u001b[1m2/2\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 1.0000 - val_loss: 0.0000e+00\n",
|
638
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 20ms/step - accuracy: 1.0000 - loss: 0.0000e+00\n",
|
639
|
+
"Bag of Words model accuracy: 1.000\n",
|
640
|
+
"Epoch 1/10\n"
|
641
|
+
]
|
642
|
+
},
|
643
|
+
{
|
644
|
+
"ename": "InvalidArgumentError",
|
645
|
+
"evalue": "Graph execution error:\n\nDetected at node RaggedGather_1/RaggedGather defined at (most recent call last):\n<stack traces unavailable>\nindices[11] = 36 is not in [0, 36)\n\t [[{{node RaggedGather_1/RaggedGather}}]]\n\t [[IteratorGetNext]] [Op:__inference_one_step_on_iterator_8973]",
|
646
|
+
"output_type": "error",
|
647
|
+
"traceback": [
|
648
|
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
649
|
+
"\u001b[1;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)",
|
650
|
+
"Cell \u001b[1;32mIn[11], line 86\u001b[0m\n\u001b[0;32m 83\u001b[0m tfidf_model\u001b[38;5;241m.\u001b[39mcompile(loss\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcategorical_crossentropy\u001b[39m\u001b[38;5;124m'\u001b[39m, optimizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124madam\u001b[39m\u001b[38;5;124m'\u001b[39m, metrics\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maccuracy\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m 85\u001b[0m \u001b[38;5;66;03m# Train the model\u001b[39;00m\n\u001b[1;32m---> 86\u001b[0m tfidf_model\u001b[38;5;241m.\u001b[39mfit(tfidf_train, train_labels_onehot, epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m32\u001b[39m, validation_data\u001b[38;5;241m=\u001b[39m(tfidf_test, test_labels_onehot))\n\u001b[0;32m 88\u001b[0m \u001b[38;5;66;03m# Evaluate the model\u001b[39;00m\n\u001b[0;32m 89\u001b[0m loss, accuracy \u001b[38;5;241m=\u001b[39m tfidf_model\u001b[38;5;241m.\u001b[39mevaluate(tfidf_test, test_labels_onehot)\n",
|
651
|
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\keras\\src\\utils\\traceback_utils.py:122\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[0;32m 120\u001b[0m \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[0;32m 121\u001b[0m \u001b[38;5;66;03m# `keras.config.disable_traceback_filtering()`\u001b[39;00m\n\u001b[1;32m--> 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 123\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 124\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
|
652
|
+
"File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\tensorflow\\python\\eager\\execute.py:53\u001b[0m, in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 51\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 52\u001b[0m ctx\u001b[38;5;241m.\u001b[39mensure_initialized()\n\u001b[1;32m---> 53\u001b[0m tensors \u001b[38;5;241m=\u001b[39m pywrap_tfe\u001b[38;5;241m.\u001b[39mTFE_Py_Execute(ctx\u001b[38;5;241m.\u001b[39m_handle, device_name, op_name,\n\u001b[0;32m 54\u001b[0m inputs, attrs, num_outputs)\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m core\u001b[38;5;241m.\u001b[39m_NotOkStatusException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 56\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
653
|
+
"\u001b[1;31mInvalidArgumentError\u001b[0m: Graph execution error:\n\nDetected at node RaggedGather_1/RaggedGather defined at (most recent call last):\n<stack traces unavailable>\nindices[11] = 36 is not in [0, 36)\n\t [[{{node RaggedGather_1/RaggedGather}}]]\n\t [[IteratorGetNext]] [Op:__inference_one_step_on_iterator_8973]"
|
654
|
+
]
|
655
|
+
}
|
656
|
+
],
|
657
|
+
"source": [
|
658
|
+
"import tensorflow as tf\n",
|
659
|
+
"from tensorflow.keras.preprocessing.text import Tokenizer\n",
|
660
|
+
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
|
661
|
+
"from sklearn.model_selection import train_test_split\n",
|
662
|
+
"from sklearn.metrics import accuracy_score\n",
|
663
|
+
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
664
|
+
"\n",
|
665
|
+
"# Load the dataset\n",
|
666
|
+
"with open('text3.txt', 'r') as f:\n",
|
667
|
+
" text_data = f.readlines()\n",
|
668
|
+
"\n",
|
669
|
+
"# Split the data into input text and labels\n",
|
670
|
+
"text = []\n",
|
671
|
+
"labels = []\n",
|
672
|
+
"label_map = {} # Create a label map to store unique labels\n",
|
673
|
+
"label_index = 0 # Initialize a label index\n",
|
674
|
+
"for line in text_data:\n",
|
675
|
+
" parts = line.split('\\t')\n",
|
676
|
+
" if len(parts) > 1:\n",
|
677
|
+
" text.append(parts[0])\n",
|
678
|
+
" label = parts[1].strip()\n",
|
679
|
+
" else:\n",
|
680
|
+
" text.append(parts[0])\n",
|
681
|
+
" label = 'default_label' # Replace with your default label\n",
|
682
|
+
"\n",
|
683
|
+
" if label not in label_map:\n",
|
684
|
+
" label_map[label] = label_index\n",
|
685
|
+
" label_index += 1\n",
|
686
|
+
"\n",
|
687
|
+
" labels.append(label_map[label])\n",
|
688
|
+
"\n",
|
689
|
+
"# Split the data into training and testing sets\n",
|
690
|
+
"train_text, test_text, train_labels, test_labels = train_test_split(text, labels, test_size=0.2, random_state=42)\n",
|
691
|
+
"\n",
|
692
|
+
"# Create a tokenizer to split the text into words\n",
|
693
|
+
"tokenizer = Tokenizer()\n",
|
694
|
+
"tokenizer.fit_on_texts(train_text)\n",
|
695
|
+
"\n",
|
696
|
+
"# Convert the text data into sequences of words\n",
|
697
|
+
"train_sequences = tokenizer.texts_to_sequences(train_text)\n",
|
698
|
+
"test_sequences = tokenizer.texts_to_sequences(test_text)\n",
|
699
|
+
"\n",
|
700
|
+
"# Pad the sequences to have the same length\n",
|
701
|
+
"max_length = 200\n",
|
702
|
+
"padded_train = pad_sequences(train_sequences, maxlen=max_length)\n",
|
703
|
+
"padded_test = pad_sequences(test_sequences, maxlen=max_length)\n",
|
704
|
+
"\n",
|
705
|
+
"# One-hot encode the labels\n",
|
706
|
+
"num_classes = len(label_map)\n",
|
707
|
+
"train_labels_onehot = tf.keras.utils.to_categorical(train_labels, num_classes)\n",
|
708
|
+
"test_labels_onehot = tf.keras.utils.to_categorical(test_labels, num_classes)\n",
|
709
|
+
"\n",
|
710
|
+
"# Define the Bag of Words model\n",
|
711
|
+
"bow_model = tf.keras.models.Sequential([\n",
|
712
|
+
" tf.keras.layers.Embedding(len(tokenizer.word_index) + 1, 64, input_length=max_length),\n",
|
713
|
+
" tf.keras.layers.Flatten(),\n",
|
714
|
+
" tf.keras.layers.Dense(64, activation='relu'),\n",
|
715
|
+
" tf.keras.layers.Dense(num_classes, activation='softmax')\n",
|
716
|
+
"])\n",
|
717
|
+
"\n",
|
718
|
+
"# Compile the model\n",
|
719
|
+
"bow_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
|
720
|
+
"\n",
|
721
|
+
"# Train the model\n",
|
722
|
+
"bow_model.fit(padded_train, train_labels_onehot, epochs=10, batch_size=32, validation_data=(padded_test, test_labels_onehot))\n",
|
723
|
+
"\n",
|
724
|
+
"# Evaluate the model\n",
|
725
|
+
"loss, accuracy = bow_model.evaluate(padded_test, test_labels_onehot)\n",
|
726
|
+
"print(f'Bag of Words model accuracy: {accuracy:.3f}')\n",
|
727
|
+
"\n",
|
728
|
+
"# Define the TF-IDF model\n",
|
729
|
+
"tfidf_vectorizer = TfidfVectorizer(max_features=374) # Adjusted max_features to match the shape of tfidf_train\n",
|
730
|
+
"tfidf_train = tfidf_vectorizer.fit_transform(train_text)\n",
|
731
|
+
"tfidf_test = tfidf_vectorizer.transform(test_text)\n",
|
732
|
+
"\n",
|
733
|
+
"# Define the TF-IDF model\n",
|
734
|
+
"tfidf_model = tf.keras.models.Sequential([\n",
|
735
|
+
" tf.keras.layers.Dense(64, activation='relu', input_shape=(374,)), # Adjusted input shape to match the shape of tfidf_train\n",
|
736
|
+
" tf.keras.layers.Dense(num_classes, activation='softmax')\n",
|
737
|
+
"])\n",
|
738
|
+
"\n",
|
739
|
+
"# Compile the model\n",
|
740
|
+
"tfidf_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
|
741
|
+
"\n",
|
742
|
+
"# Train the model\n",
|
743
|
+
"tfidf_model.fit(tfidf_train, train_labels_onehot, epochs=10, batch_size=32, validation_data=(tfidf_test, test_labels_onehot))\n",
|
744
|
+
"\n",
|
745
|
+
"# Evaluate the model\n",
|
746
|
+
"loss, accuracy = tfidf_model.evaluate(tfidf_test, test_labels_onehot)\n",
|
747
|
+
"print(f'TF-IDF model accuracy: {accuracy:.3f}')\n",
|
748
|
+
"\n",
|
749
|
+
"# Word2Vec Model\n",
|
750
|
+
"from gensim.models import Word2Vec\n",
|
751
|
+
"\n",
|
752
|
+
"# Load the dataset\n",
|
753
|
+
"with open('text3.txt', 'r') as f:\n",
|
754
|
+
" text_data = f.readlines()\n",
|
755
|
+
"\n",
|
756
|
+
"# Split the data into input text\n",
|
757
|
+
"text = [line.split('\\t')[0] for line in text_data]\n",
|
758
|
+
"\n",
|
759
|
+
"# Split the text into words\n",
|
760
|
+
"words = [line.split() for line in text]\n",
|
761
|
+
"\n",
|
762
|
+
"# Create a Word2Vec model\n",
|
763
|
+
"model = Word2Vec(words, size=100, window=5, min_count=1)\n",
|
764
|
+
"\n",
|
765
|
+
"# Get the word vectors\n",
|
766
|
+
"word_vectors = model.wv\n",
|
767
|
+
"\n",
|
768
|
+
"# Explore semantic similarity between words\n",
|
769
|
+
"print(word_vectors.similarity('word1', 'word2')) # Replace 'word1' and 'word2' with the words you want to compare"
|
770
|
+
]
|
771
|
+
},
|
772
|
+
{
|
773
|
+
"cell_type": "code",
|
774
|
+
"execution_count": 14,
|
775
|
+
"id": "87f18e07-76bd-4a44-8882-d8187e5c017a",
|
776
|
+
"metadata": {},
|
777
|
+
"outputs": [
|
778
|
+
{
|
779
|
+
"name": "stdout",
|
780
|
+
"output_type": "stream",
|
781
|
+
"text": [
|
782
|
+
"Vocabulary:\n",
|
783
|
+
" ['abil' 'account' 'accur' 'accuraci' 'across' 'action' 'activ' 'addit'\n",
|
784
|
+
" 'address' 'adjust' 'advanc' 'age' 'ai' 'alert' 'algorithm' 'allow' 'also'\n",
|
785
|
+
" 'amount' 'analysi' 'analyt' 'analyz' 'anomali' 'applic' 'area' 'artifici'\n",
|
786
|
+
" 'aspect' 'ass' 'autom' 'automat' 'autonom' 'avail' 'awar' 'bandwidth'\n",
|
787
|
+
" 'base' 'becom' 'benefit' 'bia' 'bring' 'brought' 'busi' 'capabl' 'care'\n",
|
788
|
+
" 'central' 'chain' 'characterist' 'chart' 'clean' 'clinic' 'closer'\n",
|
789
|
+
" 'cloud' 'combin' 'competit' 'complex' 'compon' 'comput' 'concern'\n",
|
790
|
+
" 'conclus' 'constitut' 'continu' 'converg' 'core' 'correct' 'cours'\n",
|
791
|
+
" 'creat' 'credit' 'critic' 'crucial' 'custom' 'cybersecur' 'dashboard'\n",
|
792
|
+
" 'data' 'dataset' 'decis' 'decision' 'deep' 'deeper' 'demand' 'deploy'\n",
|
793
|
+
" 'design' 'detect' 'develop' 'devic' 'diagnosi' 'digit' 'discriminatori'\n",
|
794
|
+
" 'disrupt' 'divers' 'drive' 'driven' 'dynam' 'earli' 'edg' 'effici'\n",
|
795
|
+
" 'effort' 'embrac' 'enabl' 'enhanc' 'ensur' 'entiti' 'error' 'essay'\n",
|
796
|
+
" 'essenti' 'establish' 'ethic' 'evalu' 'evolv' 'exampl' 'execut' 'explain'\n",
|
797
|
+
" 'explor' 'extract' 'facilit' 'fairness' 'far' 'featur' 'field' 'filter'\n",
|
798
|
+
" 'financ' 'financi' 'fluctuat' 'focu' 'forecast' 'form' 'format' 'fraudul'\n",
|
799
|
+
" 'full' 'fundament' 'futur' 'gener' 'govern' 'graph' 'graphic' 'guidelin'\n",
|
800
|
+
" 'handl' 'har' 'healthcar' 'hidden' 'higher' 'highlight' 'histor' 'human'\n",
|
801
|
+
" 'identifi' 'imag' 'immedi' 'impact' 'implement' 'import' 'imposs'\n",
|
802
|
+
" 'improv' 'inadvert' 'includ' 'inconsist' 'increasingli' 'industri'\n",
|
803
|
+
" 'inform' 'innov' 'insight' 'instanc' 'integr' 'intellig' 'interact'\n",
|
804
|
+
" 'introduct' 'intuit' 'invalu' 'inventori' 'invest' 'involv' 'issu' 'key'\n",
|
805
|
+
" 'knowledg' 'languag' 'larg' 'latenc' 'lead' 'learn' 'level' 'leverag'\n",
|
806
|
+
" 'like' 'local' 'machin' 'mak' 'make' 'manag' 'mani' 'manner' 'manual'\n",
|
807
|
+
" 'market' 'massiv' 'meaning' 'medic' 'medium' 'method' 'mimic' 'miss'\n",
|
808
|
+
" 'mitig' 'ml' 'model' 'monitor' 'multipl' 'natur' 'navig' 'network'\n",
|
809
|
+
" 'neural' 'new' 'nlp' 'note' 'numer' 'object' 'often' 'one' 'oper' 'optim'\n",
|
810
|
+
" 'organ' 'outcom' 'outlier' 'paper' 'paramet' 'particularli' 'patient'\n",
|
811
|
+
" 'pattern' 'perform' 'perpetu' 'person' 'plan' 'platform' 'portfolio'\n",
|
812
|
+
" 'portion' 'potenti' 'pow' 'power' 'predict' 'prepar' 'present' 'primari'\n",
|
813
|
+
" 'process' 'profound' 'project' 'provid' 'quick' 'quickli' 'rais' 'reach'\n",
|
814
|
+
" 'real' 'recent' 'recognit' 'recommend' 'reduc' 'regress' 'regul'\n",
|
815
|
+
" 'relationship' 'relev' 'reli' 'repres' 'represent' 'requir' 'research'\n",
|
816
|
+
" 'respect' 'respons' 'retail' 'revolution' 'right' 'risk' 'safeti' 'sale'\n",
|
817
|
+
" 'scenario' 'scienc' 'scientist' 'second' 'sector' 'seek' 'sensor'\n",
|
818
|
+
" 'sentiment' 'server' 'signific' 'significantli' 'similarli' 'social'\n",
|
819
|
+
" 'societ' 'sophist' 'sourc' 'speed' 'spent' 'split' 'stakehold' 'stay'\n",
|
820
|
+
" 'step' 'strategi' 'stream' 'structur' 'struggl' 'subfield' 'suitabl'\n",
|
821
|
+
" 'summar' 'suppli' 'support' 'synergi' 'system' 'task' 'techniqu'\n",
|
822
|
+
" 'technolog' 'text' 'tim' 'time' 'today' 'togeth' 'tool' 'trade' 'tradit'\n",
|
823
|
+
" 'train' 'transform' 'transpar' 'treatment' 'tree' 'trend' 'trigger'\n",
|
824
|
+
" 'uncov' 'understand' 'unfair' 'unpreced' 'unstructur' 'use' 'user' 'valu'\n",
|
825
|
+
" 'valuabl' 'variou' 'vast' 'vehicl' 'video' 'vision' 'visual' 'vital'\n",
|
826
|
+
" 'way' 'within' 'without' 'workflow' 'world' 'would' 'year']\n",
|
827
|
+
"\n",
|
828
|
+
"Bag of Words (Vectorization):\n",
|
829
|
+
" [[ 2 1 2 2 1 3 2 1 3 1 7 1 34 1 9 3 2 2 8 6 8 1 3 1\n",
|
830
|
+
" 3 1 1 5 2 2 1 1 1 2 1 1 4 1 1 1 6 1 1 1 1 1 4 1\n",
|
831
|
+
" 1 1 1 1 2 1 5 3 1 1 2 1 1 1 1 1 1 1 3 2 1 1 62 3\n",
|
832
|
+
" 9 4 1 1 2 2 1 3 2 1 1 1 1 1 1 2 7 1 1 3 3 2 1 8\n",
|
833
|
+
" 5 4 1 1 1 2 1 5 1 1 3 1 1 2 4 3 1 1 1 2 1 2 2 1\n",
|
834
|
+
" 3 2 1 1 2 1 1 1 5 1 1 1 1 1 1 4 1 1 2 1 4 5 1 1\n",
|
835
|
+
" 4 1 1 1 4 1 1 1 1 3 4 2 4 1 1 5 3 1 1 1 2 2 2 1\n",
|
836
|
+
" 2 1 5 1 1 3 5 2 2 3 1 3 4 5 2 1 1 2 3 1 1 1 2 1\n",
|
837
|
+
" 1 1 1 1 5 2 1 4 1 1 1 1 6 1 2 1 2 1 1 4 3 3 1 1\n",
|
838
|
+
" 1 3 2 2 1 1 2 1 1 1 2 2 2 3 9 4 2 1 21 1 1 2 1 1\n",
|
839
|
+
" 1 1 7 1 1 1 3 1 1 1 1 1 1 1 2 2 1 2 1 1 1 2 1 1\n",
|
840
|
+
" 2 15 1 1 1 1 1 2 1 3 2 1 2 1 1 2 1 1 1 1 1 1 2 3\n",
|
841
|
+
" 1 1 1 1 1 1 2 1 3 3 4 1 3 7 3 2 1 2 1 1 2 7 1 1\n",
|
842
|
+
" 1 5 1 1 2 1 1 2 2 1 2 3 2 2 2 1 2 7 1 1 1 1 1 1\n",
|
843
|
+
" 1 1]]\n"
|
844
|
+
]
|
845
|
+
},
|
846
|
+
{
|
847
|
+
"name": "stderr",
|
848
|
+
"output_type": "stream",
|
849
|
+
"text": [
|
850
|
+
"[nltk_data] Downloading package punkt to\n",
|
851
|
+
"[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
|
852
|
+
"[nltk_data] Package punkt is already up-to-date!\n",
|
853
|
+
"[nltk_data] Downloading package stopwords to\n",
|
854
|
+
"[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
|
855
|
+
"[nltk_data] Package stopwords is already up-to-date!\n",
|
856
|
+
"[nltk_data] Downloading package wordnet to\n",
|
857
|
+
"[nltk_data] C:\\Users\\admin\\AppData\\Roaming\\nltk_data...\n",
|
858
|
+
"[nltk_data] Package wordnet is already up-to-date!\n"
|
859
|
+
]
|
860
|
+
}
|
861
|
+
],
|
862
|
+
"source": [
|
863
|
+
"# Import required libraries\n",
|
864
|
+
"import nltk\n",
|
865
|
+
"from nltk.corpus import stopwords\n",
|
866
|
+
"from nltk.tokenize import word_tokenize\n",
|
867
|
+
"from nltk.stem import WordNetLemmatizer, PorterStemmer\n",
|
868
|
+
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
869
|
+
"import string\n",
|
870
|
+
"\n",
|
871
|
+
"# Download required NLTK resources\n",
|
872
|
+
"'''nltk.download('punkt')\n",
|
873
|
+
"nltk.download('stopwords')\n",
|
874
|
+
"nltk.download('wordnet')\n",
|
875
|
+
"'''\n",
|
876
|
+
"# Read the content of the file\n",
|
877
|
+
"file_path = 'text3.txt'\n",
|
878
|
+
"with open(file_path, 'r') as file:\n",
|
879
|
+
" text_data = file.read()\n",
|
880
|
+
"\n",
|
881
|
+
"# Initialize the stopwords, lemmatizer, and stemmer\n",
|
882
|
+
"stop_words = set(stopwords.words('english'))\n",
|
883
|
+
"lemmatizer = WordNetLemmatizer()\n",
|
884
|
+
"stemmer = PorterStemmer()\n",
|
885
|
+
"\n",
|
886
|
+
"# Function to preprocess text data\n",
|
887
|
+
"def preprocess_text(text):\n",
|
888
|
+
" # Tokenize the text\n",
|
889
|
+
" tokens = word_tokenize(text.lower())\n",
|
890
|
+
" \n",
|
891
|
+
" # Remove punctuation and stopwords, and perform lemmatization and stemming\n",
|
892
|
+
" processed_tokens = []\n",
|
893
|
+
" for word in tokens:\n",
|
894
|
+
" if word not in stop_words and word not in string.punctuation:\n",
|
895
|
+
" lemmatized_word = lemmatizer.lemmatize(word) # Lemmatization\n",
|
896
|
+
" stemmed_word = stemmer.stem(lemmatized_word) # Stemming\n",
|
897
|
+
" processed_tokens.append(stemmed_word)\n",
|
898
|
+
" \n",
|
899
|
+
" # Join tokens back into a single string\n",
|
900
|
+
" return ' '.join(processed_tokens)\n",
|
901
|
+
"\n",
|
902
|
+
"# Preprocess the text data\n",
|
903
|
+
"preprocessed_text = preprocess_text(text_data)\n",
|
904
|
+
"\n",
|
905
|
+
"# Initialize the CountVectorizer (Bag of Words model)\n",
|
906
|
+
"vectorizer = CountVectorizer()\n",
|
907
|
+
"\n",
|
908
|
+
"# Fit and transform the preprocessed text data to create the BoW model\n",
|
909
|
+
"X = vectorizer.fit_transform([preprocessed_text])\n",
|
910
|
+
"\n",
|
911
|
+
"# Extract the vocabulary (words and their corresponding indices)\n",
|
912
|
+
"vocabulary = vectorizer.get_feature_names_out()\n",
|
913
|
+
"\n",
|
914
|
+
"# Convert the BoW model to an array for easy viewing\n",
|
915
|
+
"bow_array = X.toarray()\n",
|
916
|
+
"\n",
|
917
|
+
"# Print the vocabulary and its corresponding vector\n",
|
918
|
+
"print(\"Vocabulary:\\n\", vocabulary)\n",
|
919
|
+
"print(\"\\nBag of Words (Vectorization):\\n\", bow_array)\n"
|
920
|
+
]
|
921
|
+
},
|
922
|
+
{
|
923
|
+
"cell_type": "code",
|
924
|
+
"execution_count": 15,
|
925
|
+
"id": "da811087-ddf6-45fb-af92-a8b852638723",
|
926
|
+
"metadata": {},
|
927
|
+
"outputs": [
|
928
|
+
{
|
929
|
+
"name": "stdout",
|
930
|
+
"output_type": "stream",
|
931
|
+
"text": [
|
932
|
+
"Bag of Words (BoW) Vocabulary:\n",
|
933
|
+
" ['abil' 'account' 'accur' 'accuraci' 'across' 'action' 'activ' 'addit'\n",
|
934
|
+
" 'address' 'adjust' 'advanc' 'age' 'ai' 'alert' 'algorithm' 'allow' 'also'\n",
|
935
|
+
" 'amount' 'analysi' 'analyt' 'analyz' 'anomali' 'applic' 'area' 'artifici'\n",
|
936
|
+
" 'aspect' 'ass' 'autom' 'automat' 'autonom' 'avail' 'awar' 'bandwidth'\n",
|
937
|
+
" 'base' 'becom' 'benefit' 'bia' 'bring' 'brought' 'busi' 'capabl' 'care'\n",
|
938
|
+
" 'central' 'chain' 'characterist' 'chart' 'clean' 'clinic' 'closer'\n",
|
939
|
+
" 'cloud' 'combin' 'competit' 'complex' 'compon' 'comput' 'concern'\n",
|
940
|
+
" 'conclus' 'constitut' 'continu' 'converg' 'core' 'correct' 'cours'\n",
|
941
|
+
" 'creat' 'credit' 'critic' 'crucial' 'custom' 'cybersecur' 'dashboard'\n",
|
942
|
+
" 'data' 'dataset' 'decis' 'decision' 'deep' 'deeper' 'demand' 'deploy'\n",
|
943
|
+
" 'design' 'detect' 'develop' 'devic' 'diagnosi' 'digit' 'discriminatori'\n",
|
944
|
+
" 'disrupt' 'divers' 'drive' 'driven' 'dynam' 'earli' 'edg' 'effici'\n",
|
945
|
+
" 'effort' 'embrac' 'enabl' 'enhanc' 'ensur' 'entiti' 'error' 'essay'\n",
|
946
|
+
" 'essenti' 'establish' 'ethic' 'evalu' 'evolv' 'exampl' 'execut' 'explain'\n",
|
947
|
+
" 'explor' 'extract' 'facilit' 'fairness' 'far' 'featur' 'field' 'filter'\n",
|
948
|
+
" 'financ' 'financi' 'fluctuat' 'focu' 'forecast' 'form' 'format' 'fraudul'\n",
|
949
|
+
" 'full' 'fundament' 'futur' 'gener' 'govern' 'graph' 'graphic' 'guidelin'\n",
|
950
|
+
" 'handl' 'har' 'healthcar' 'hidden' 'higher' 'highlight' 'histor' 'human'\n",
|
951
|
+
" 'identifi' 'imag' 'immedi' 'impact' 'implement' 'import' 'imposs'\n",
|
952
|
+
" 'improv' 'inadvert' 'includ' 'inconsist' 'increasingli' 'industri'\n",
|
953
|
+
" 'inform' 'innov' 'insight' 'instanc' 'integr' 'intellig' 'interact'\n",
|
954
|
+
" 'introduct' 'intuit' 'invalu' 'inventori' 'invest' 'involv' 'issu' 'key'\n",
|
955
|
+
" 'knowledg' 'languag' 'larg' 'latenc' 'lead' 'learn' 'level' 'leverag'\n",
|
956
|
+
" 'like' 'local' 'machin' 'mak' 'make' 'manag' 'mani' 'manner' 'manual'\n",
|
957
|
+
" 'market' 'massiv' 'meaning' 'medic' 'medium' 'method' 'mimic' 'miss'\n",
|
958
|
+
" 'mitig' 'ml' 'model' 'monitor' 'multipl' 'natur' 'navig' 'network'\n",
|
959
|
+
" 'neural' 'new' 'nlp' 'note' 'numer' 'object' 'often' 'one' 'oper' 'optim'\n",
|
960
|
+
" 'organ' 'outcom' 'outlier' 'paper' 'paramet' 'particularli' 'patient'\n",
|
961
|
+
" 'pattern' 'perform' 'perpetu' 'person' 'plan' 'platform' 'portfolio'\n",
|
962
|
+
" 'portion' 'potenti' 'pow' 'power' 'predict' 'prepar' 'present' 'primari'\n",
|
963
|
+
" 'process' 'profound' 'project' 'provid' 'quick' 'quickli' 'rais' 'reach'\n",
|
964
|
+
" 'real' 'recent' 'recognit' 'recommend' 'reduc' 'regress' 'regul'\n",
|
965
|
+
" 'relationship' 'relev' 'reli' 'repres' 'represent' 'requir' 'research'\n",
|
966
|
+
" 'respect' 'respons' 'retail' 'revolution' 'right' 'risk' 'safeti' 'sale'\n",
|
967
|
+
" 'scenario' 'scienc' 'scientist' 'second' 'sector' 'seek' 'sensor'\n",
|
968
|
+
" 'sentiment' 'server' 'signific' 'significantli' 'similarli' 'social'\n",
|
969
|
+
" 'societ' 'sophist' 'sourc' 'speed' 'spent' 'split' 'stakehold' 'stay'\n",
|
970
|
+
" 'step' 'strategi' 'stream' 'structur' 'struggl' 'subfield' 'suitabl'\n",
|
971
|
+
" 'summar' 'suppli' 'support' 'synergi' 'system' 'task' 'techniqu'\n",
|
972
|
+
" 'technolog' 'text' 'tim' 'time' 'today' 'togeth' 'tool' 'trade' 'tradit'\n",
|
973
|
+
" 'train' 'transform' 'transpar' 'treatment' 'tree' 'trend' 'trigger'\n",
|
974
|
+
" 'uncov' 'understand' 'unfair' 'unpreced' 'unstructur' 'use' 'user' 'valu'\n",
|
975
|
+
" 'valuabl' 'variou' 'vast' 'vehicl' 'video' 'vision' 'visual' 'vital'\n",
|
976
|
+
" 'way' 'within' 'without' 'workflow' 'world' 'would' 'year']\n",
|
977
|
+
"\n",
|
978
|
+
"Bag of Words (Vectorization):\n",
|
979
|
+
" [[ 2 1 2 2 1 3 2 1 3 1 7 1 34 1 9 3 2 2 8 6 8 1 3 1\n",
|
980
|
+
" 3 1 1 5 2 2 1 1 1 2 1 1 4 1 1 1 6 1 1 1 1 1 4 1\n",
|
981
|
+
" 1 1 1 1 2 1 5 3 1 1 2 1 1 1 1 1 1 1 3 2 1 1 62 3\n",
|
982
|
+
" 9 4 1 1 2 2 1 3 2 1 1 1 1 1 1 2 7 1 1 3 3 2 1 8\n",
|
983
|
+
" 5 4 1 1 1 2 1 5 1 1 3 1 1 2 4 3 1 1 1 2 1 2 2 1\n",
|
984
|
+
" 3 2 1 1 2 1 1 1 5 1 1 1 1 1 1 4 1 1 2 1 4 5 1 1\n",
|
985
|
+
" 4 1 1 1 4 1 1 1 1 3 4 2 4 1 1 5 3 1 1 1 2 2 2 1\n",
|
986
|
+
" 2 1 5 1 1 3 5 2 2 3 1 3 4 5 2 1 1 2 3 1 1 1 2 1\n",
|
987
|
+
" 1 1 1 1 5 2 1 4 1 1 1 1 6 1 2 1 2 1 1 4 3 3 1 1\n",
|
988
|
+
" 1 3 2 2 1 1 2 1 1 1 2 2 2 3 9 4 2 1 21 1 1 2 1 1\n",
|
989
|
+
" 1 1 7 1 1 1 3 1 1 1 1 1 1 1 2 2 1 2 1 1 1 2 1 1\n",
|
990
|
+
" 2 15 1 1 1 1 1 2 1 3 2 1 2 1 1 2 1 1 1 1 1 1 2 3\n",
|
991
|
+
" 1 1 1 1 1 1 2 1 3 3 4 1 3 7 3 2 1 2 1 1 2 7 1 1\n",
|
992
|
+
" 1 5 1 1 2 1 1 2 2 1 2 3 2 2 2 1 2 7 1 1 1 1 1 1\n",
|
993
|
+
" 1 1]]\n",
|
994
|
+
"\n",
|
995
|
+
"TensorFlow Word Index (Vocabulary): {'data': 1, 'ai': 2, 'process': 3, 'scienc': 4, 'algorithm': 5, 'predict': 6, 'decis': 7, 'analysi': 8, 'analyz': 9, 'enabl': 10, 'advanc': 11, 'transform': 12, 'driven': 13, 'real': 14, 'tim': 15, 'visual': 16, 'capabl': 17, 'nlp': 18, 'analyt': 19, 'intellig': 20, 'enhanc': 21, 'gener': 22, 'learn': 23, 'identifi': 24, 'trend': 25, 'autom': 26, 'languag': 27, 'comput': 28, 'make': 29, 'model': 30, 'ethic': 31, 'impact': 32, 'healthcar': 33, 'human': 34, 'extract': 35, 'decision': 36, 'mak': 37, 'inform': 38, 'clean': 39, 'prepar': 40, 'improv': 41, 'techniqu': 42, 'natur': 43, 'optim': 44, 'insight': 45, 'ensur': 46, 'bia': 47, 'artifici': 48, 'lead': 49, 'signific': 50, 'focu': 51, 'power': 52, 'effici': 53, 'particularli': 54, 'machin': 55, 'dataset': 56, 'detect': 57, 'valuabl': 58, 'market': 59, 'crucial': 60, 'time': 61, 'task': 62, 'exampl': 63, 'text': 64, 'reduc': 65, 'allow': 66, 'organ': 67, 'outcom': 68, 'industri': 69, 'like': 70, 'applic': 71, 'stream': 72, 'edg': 73, 'action': 74, 'facilit': 75, 'interact': 76, 'system': 77, 'address': 78, 'concern': 79, 'numer': 80, 'field': 81, 'financ': 82, 'abil': 83, 'drive': 84, 'innov': 85, 'explor': 86, 'highlight': 87, 'key': 88, 'often': 89, 'vast': 90, 'amount': 91, 'today': 92, 'involv': 93, 'accuraci': 94, 'pattern': 95, 'manual': 96, 'patient': 97, 'person': 98, 'fraudul': 99, 'activ': 100, 'invest': 101, 'portion': 102, 'significantli': 103, 'vision': 104, 'automat': 105, 'valu': 106, 'unstructur': 107, 'effort': 108, 'requir': 109, 'level': 110, 'forecast': 111, 'develop': 112, 'accur': 113, 'complex': 114, 'base': 115, 'continu': 116, 'inventori': 117, 'manag': 118, 'custom': 119, 'demand': 120, 'strategi': 121, 'autonom': 122, 'vehicl': 123, 'financi': 124, 'leverag': 125, 'social': 126, 'medium': 127, 'sourc': 128, 'essenti': 129, 'deploy': 130, 'scenario': 131, 'respons': 132, 'monitor': 133, 'understand': 134, 'provid': 135, 'tool': 136, 'also': 137, 'pow': 138, 'present': 139, 'sentiment': 140, 'research': 141, 'support': 142, 'potenti': 143, 'risk': 144, 'train': 145, 'use': 146, 'variou': 147, 'introduct': 148, 'recent': 149, 'year': 150, 'converg': 151, 'revolution': 152, 'technolog': 153, 'mimic': 154, 'knowledg': 155, 'togeth': 156, 'form': 157, 'combin': 158, 'essay': 159, 'area': 160, 'one': 161, 'primari': 162, 'way': 163, 'tradit': 164, 'method': 165, 'struggl': 166, 'handl': 167, \"'s\": 168, 'digit': 169, 'age': 170, 'ml': 171, 'deep': 172, 'massiv': 173, 'unpreced': 174, 'speed': 175, 'instanc': 176, 'larg': 177, 'would': 178, 'imposs': 179, 'earli': 180, 'diagnosi': 181, 'treatment': 182, 'plan': 183, 'step': 184, 'workflow': 185, 'account': 186, 'spent': 187, 'project': 188, 'correct': 189, 'error': 190, 'inconsist': 191, 'miss': 192, 'relev': 193, 'structur': 194, 'format': 195, 'suitabl': 196, 'similarli': 197, 'imag': 198, 'video': 199, 'object': 200, 'meaning': 201, 'featur': 202, 'scientist': 203, 'higher': 204, 'core': 205, 'compon': 206, 'futur': 207, 'sophist': 208, 'relationship': 209, 'within': 210, 'regress': 211, 'tree': 212, 'neural': 213, 'network': 214, 'histor': 215, 'new': 216, 'becom': 217, 'avail': 218, 'retail': 219, 'mani': 220, 'trade': 221, 'cybersecur': 222, 'immedi': 223, 'sensor': 224, 'anomali': 225, 'trigger': 226, 'alert': 227, 'split': 228, 'second': 229, 'safeti': 230, 'navig': 231, 'bring': 232, 'closer': 233, 'latenc': 234, 'bandwidth': 235, 'devic': 236, 'local': 237, 'without': 238, 'reli': 239, 'central': 240, 'cloud': 241, 'server': 242, 'quick': 243, 'critic': 244, 'vital': 245, 'aspect': 246, 'stakehold': 247, 'graphic': 248, 'represent': 249, 'deeper': 250, 'intuit': 251, 'characterist': 252, 'outlier': 253, 'creat': 254, 'dashboard': 255, 'user': 256, 'dynam': 257, 'adjust': 258, 'paramet': 259, 'filter': 260, 'uncov': 261, 'hidden': 262, 'platform': 263, 'busi': 264, 'sale': 265, 'chart': 266, 'graph': 267, 'execut': 268, 'quickli': 269, 'subfield': 270, 'constitut': 271, 'perform': 272, 'entiti': 273, 'recognit': 274, 'summar': 275, 'invalu': 276, 'clinic': 277, 'note': 278, 'paper': 279, 'medic': 280, 'care': 281, 'fundament': 282, 'evalu': 283, 'multipl': 284, 'recommend': 285, 'cours': 286, 'suppli': 287, 'chain': 288, 'fluctuat': 289, 'disrupt': 290, 'sector': 291, 'ass': 292, 'credit': 293, 'portfolio': 294, 'oper': 295, 'brought': 296, 'benefit': 297, 'rais': 298, 'import': 299, 'inadvert': 300, 'perpetu': 301, 'unfair': 302, 'discriminatori': 303, 'issu': 304, 'mitig': 305, 'includ': 306, 'fairness': 307, 'awar': 308, 'divers': 309, 'repres': 310, 'implement': 311, 'transpar': 312, 'explain': 313, 'addit': 314, 'guidelin': 315, 'regul': 316, 'establish': 317, 'govern': 318, 'design': 319, 'manner': 320, 'respect': 321, 'right': 322, 'societ': 323, 'conclus': 324, 'profound': 325, 'far': 326, 'reach': 327, 'evolv': 328, 'integr': 329, 'across': 330, 'embrac': 331, 'synergi': 332, 'seek': 333, 'har': 334, 'full': 335, 'stay': 336, 'competit': 337, 'increasingli': 338, 'world': 339}\n",
|
996
|
+
"\n",
|
997
|
+
"Bag of Words (One-Hot Encoding) from TensorFlow:\n",
|
998
|
+
" [[ 0. 62. 34. 21. 15. 9. 9. 9. 8. 8. 8. 7. 7. 7. 7. 7. 7. 6.\n",
|
999
|
+
" 6. 6. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 5. 4. 4. 4. 4.\n",
|
1000
|
+
" 4. 4. 4. 4. 4. 4. 4. 4. 4. 4. 4. 4. 3. 3. 3. 3. 3. 3.\n",
|
1001
|
+
" 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.\n",
|
1002
|
+
" 3. 3. 3. 3. 3. 3. 3. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.\n",
|
1003
|
+
" 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.\n",
|
1004
|
+
" 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.\n",
|
1005
|
+
" 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.\n",
|
1006
|
+
" 2. 2. 2. 2. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1007
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1008
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1009
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1010
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1011
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1012
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1013
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1014
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1015
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
|
1016
|
+
" 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
1017
|
+
"\n",
|
1018
|
+
"TF-IDF Representation from TensorFlow:\n",
|
1019
|
+
" [[0. 2.0788741 1.83528126 1.63991273 1.50348498 1.29636301\n",
|
1020
|
+
" 1.29636301 1.29636301 1.2486061 1.2486061 1.2486061 1.19446378\n",
|
1021
|
+
" 1.19446378 1.19446378 1.19446378 1.19446378 1.19446378 1.13196106\n",
|
1022
|
+
" 1.13196106 1.13196106 1.05803603 1.05803603 1.05803603 1.05803603\n",
|
1023
|
+
" 1.05803603 1.05803603 1.05803603 1.05803603 1.05803603 1.05803603\n",
|
1024
|
+
" 1.05803603 1.05803603 0.9675591 0.9675591 0.9675591 0.9675591\n",
|
1025
|
+
" 0.9675591 0.9675591 0.9675591 0.9675591 0.9675591 0.9675591\n",
|
1026
|
+
" 0.9675591 0.9675591 0.9675591 0.9675591 0.9675591 0.9675591\n",
|
1027
|
+
" 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406\n",
|
1028
|
+
" 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406\n",
|
1029
|
+
" 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406\n",
|
1030
|
+
" 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406\n",
|
1031
|
+
" 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406 0.85091406\n",
|
1032
|
+
" 0.85091406 0.85091406 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1033
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1034
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1035
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1036
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1037
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1038
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1039
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1040
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1041
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1042
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121 0.6865121\n",
|
1043
|
+
" 0.6865121 0.6865121 0.6865121 0.6865121 0.40546511 0.40546511\n",
|
1044
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1045
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1046
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1047
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1048
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1049
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1050
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1051
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1052
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1053
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1054
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1055
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1056
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1057
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1058
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1059
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1060
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1061
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1062
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1063
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1064
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1065
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1066
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1067
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1068
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1069
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1070
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1071
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1072
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1073
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1074
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511 0.40546511\n",
|
1075
|
+
" 0.40546511 0.40546511 0.40546511 0.40546511]]\n"
|
1076
|
+
]
|
1077
|
+
}
|
1078
|
+
],
|
1079
|
+
"source": [
|
1080
|
+
"# Import required libraries\n",
|
1081
|
+
"import nltk\n",
|
1082
|
+
"from nltk.corpus import stopwords\n",
|
1083
|
+
"from nltk.tokenize import word_tokenize\n",
|
1084
|
+
"from nltk.stem import WordNetLemmatizer, PorterStemmer\n",
|
1085
|
+
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
1086
|
+
"import tensorflow as tf\n",
|
1087
|
+
"import string\n",
|
1088
|
+
"\n",
|
1089
|
+
"# Download required NLTK resources\n",
|
1090
|
+
"'''nltk.download('punkt')\n",
|
1091
|
+
"nltk.download('stopwords')\n",
|
1092
|
+
"nltk.download('wordnet')\n",
|
1093
|
+
"'''\n",
|
1094
|
+
"# Read the content of the file\n",
|
1095
|
+
"file_path = 'text3.txt'\n",
|
1096
|
+
"with open(file_path, 'r') as file:\n",
|
1097
|
+
" text_data = file.read()\n",
|
1098
|
+
"\n",
|
1099
|
+
"# Initialize the stopwords, lemmatizer, and stemmer\n",
|
1100
|
+
"stop_words = set(stopwords.words('english'))\n",
|
1101
|
+
"lemmatizer = WordNetLemmatizer()\n",
|
1102
|
+
"stemmer = PorterStemmer()\n",
|
1103
|
+
"\n",
|
1104
|
+
"# Function to preprocess text data (Tokenization, Lemmatization, Stemming, Stopwords Removal)\n",
|
1105
|
+
"def preprocess_text(text):\n",
|
1106
|
+
" # Tokenize the text\n",
|
1107
|
+
" tokens = word_tokenize(text.lower())\n",
|
1108
|
+
" \n",
|
1109
|
+
" # Remove punctuation and stopwords, and perform lemmatization and stemming\n",
|
1110
|
+
" processed_tokens = []\n",
|
1111
|
+
" for word in tokens:\n",
|
1112
|
+
" if word not in stop_words and word not in string.punctuation:\n",
|
1113
|
+
" lemmatized_word = lemmatizer.lemmatize(word) # Lemmatization\n",
|
1114
|
+
" stemmed_word = stemmer.stem(lemmatized_word) # Stemming\n",
|
1115
|
+
" processed_tokens.append(stemmed_word)\n",
|
1116
|
+
" \n",
|
1117
|
+
" # Join tokens back into a single string\n",
|
1118
|
+
" return ' '.join(processed_tokens)\n",
|
1119
|
+
"\n",
|
1120
|
+
"# Preprocess the text data\n",
|
1121
|
+
"preprocessed_text = preprocess_text(text_data)\n",
|
1122
|
+
"\n",
|
1123
|
+
"# =======================\n",
|
1124
|
+
"# BAG OF WORDS (BoW) PART\n",
|
1125
|
+
"# =======================\n",
|
1126
|
+
"\n",
|
1127
|
+
"# Initialize the CountVectorizer (Bag of Words model)\n",
|
1128
|
+
"vectorizer = CountVectorizer()\n",
|
1129
|
+
"\n",
|
1130
|
+
"# Fit and transform the preprocessed text data to create the BoW model\n",
|
1131
|
+
"X = vectorizer.fit_transform([preprocessed_text])\n",
|
1132
|
+
"\n",
|
1133
|
+
"# Extract the vocabulary (words and their corresponding indices)\n",
|
1134
|
+
"vocabulary = vectorizer.get_feature_names_out()\n",
|
1135
|
+
"\n",
|
1136
|
+
"# Convert the BoW model to an array for easy viewing\n",
|
1137
|
+
"bow_array = X.toarray()\n",
|
1138
|
+
"\n",
|
1139
|
+
"# Print the BoW results\n",
|
1140
|
+
"print(\"Bag of Words (BoW) Vocabulary:\\n\", vocabulary)\n",
|
1141
|
+
"print(\"\\nBag of Words (Vectorization):\\n\", bow_array)\n",
|
1142
|
+
"\n",
|
1143
|
+
"# ============================\n",
|
1144
|
+
"# TENSORFLOW TF-IDF PART STARTS\n",
|
1145
|
+
"# ============================\n",
|
1146
|
+
"\n",
|
1147
|
+
"# Initialize the TensorFlow Tokenizer\n",
|
1148
|
+
"tokenizer = tf.keras.preprocessing.text.Tokenizer()\n",
|
1149
|
+
"\n",
|
1150
|
+
"# Fit tokenizer on the preprocessed text\n",
|
1151
|
+
"tokenizer.fit_on_texts([preprocessed_text])\n",
|
1152
|
+
"\n",
|
1153
|
+
"# Convert text to sequences of integers (word indices)\n",
|
1154
|
+
"sequences = tokenizer.texts_to_sequences([preprocessed_text])\n",
|
1155
|
+
"\n",
|
1156
|
+
"# Get the word index (vocabulary mapping)\n",
|
1157
|
+
"word_index = tokenizer.word_index\n",
|
1158
|
+
"\n",
|
1159
|
+
"# Convert sequences to one-hot encoded form (Bag of Words equivalent)\n",
|
1160
|
+
"one_hot_results = tokenizer.texts_to_matrix([preprocessed_text], mode='count')\n",
|
1161
|
+
"\n",
|
1162
|
+
"# Convert sequences to TF-IDF form\n",
|
1163
|
+
"tfidf_results = tokenizer.texts_to_matrix([preprocessed_text], mode='tfidf')\n",
|
1164
|
+
"\n",
|
1165
|
+
"# ====================\n",
|
1166
|
+
"# OUTPUT THE RESULTS\n",
|
1167
|
+
"# ====================\n",
|
1168
|
+
"\n",
|
1169
|
+
"# Print TensorFlow's vocabulary and TF-IDF results\n",
|
1170
|
+
"print(\"\\nTensorFlow Word Index (Vocabulary):\", word_index)\n",
|
1171
|
+
"print(\"\\nBag of Words (One-Hot Encoding) from TensorFlow:\\n\", one_hot_results)\n",
|
1172
|
+
"print(\"\\nTF-IDF Representation from TensorFlow:\\n\", tfidf_results)\n"
|
1173
|
+
]
|
1174
|
+
},
|
1175
|
+
{
|
1176
|
+
"cell_type": "code",
|
1177
|
+
"execution_count": 16,
|
1178
|
+
"id": "5027e631-a994-4ae8-b76e-490106ed0f4b",
|
1179
|
+
"metadata": {},
|
1180
|
+
"outputs": [
|
1181
|
+
{
|
1182
|
+
"name": "stderr",
|
1183
|
+
"output_type": "stream",
|
1184
|
+
"text": [
|
1185
|
+
"C:\\Users\\admin\\anaconda3\\Lib\\site-packages\\keras\\src\\layers\\core\\input_layer.py:25: UserWarning: Argument `input_shape` is deprecated. Use `shape` instead.\n",
|
1186
|
+
" warnings.warn(\n"
|
1187
|
+
]
|
1188
|
+
},
|
1189
|
+
{
|
1190
|
+
"data": {
|
1191
|
+
"text/html": [
|
1192
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_6\"</span>\n",
|
1193
|
+
"</pre>\n"
|
1194
|
+
],
|
1195
|
+
"text/plain": [
|
1196
|
+
"\u001b[1mModel: \"sequential_6\"\u001b[0m\n"
|
1197
|
+
]
|
1198
|
+
},
|
1199
|
+
"metadata": {},
|
1200
|
+
"output_type": "display_data"
|
1201
|
+
},
|
1202
|
+
{
|
1203
|
+
"data": {
|
1204
|
+
"text/html": [
|
1205
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
|
1206
|
+
"┃<span style=\"font-weight: bold\"> Layer (type) </span>┃<span style=\"font-weight: bold\"> Output Shape </span>┃<span style=\"font-weight: bold\"> Param # </span>┃\n",
|
1207
|
+
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
|
1208
|
+
"│ dense_12 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">21,824</span> │\n",
|
1209
|
+
"├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n",
|
1210
|
+
"│ dense_13 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">32</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">2,080</span> │\n",
|
1211
|
+
"├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n",
|
1212
|
+
"│ dense_14 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>) │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>) │ <span style=\"color: #00af00; text-decoration-color: #00af00\">33</span> │\n",
|
1213
|
+
"└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘\n",
|
1214
|
+
"</pre>\n"
|
1215
|
+
],
|
1216
|
+
"text/plain": [
|
1217
|
+
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n",
|
1218
|
+
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
|
1219
|
+
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n",
|
1220
|
+
"│ dense_12 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m21,824\u001b[0m │\n",
|
1221
|
+
"├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n",
|
1222
|
+
"│ dense_13 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m2,080\u001b[0m │\n",
|
1223
|
+
"├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n",
|
1224
|
+
"│ dense_14 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m33\u001b[0m │\n",
|
1225
|
+
"└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘\n"
|
1226
|
+
]
|
1227
|
+
},
|
1228
|
+
"metadata": {},
|
1229
|
+
"output_type": "display_data"
|
1230
|
+
},
|
1231
|
+
{
|
1232
|
+
"data": {
|
1233
|
+
"text/html": [
|
1234
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">23,937</span> (93.50 KB)\n",
|
1235
|
+
"</pre>\n"
|
1236
|
+
],
|
1237
|
+
"text/plain": [
|
1238
|
+
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m23,937\u001b[0m (93.50 KB)\n"
|
1239
|
+
]
|
1240
|
+
},
|
1241
|
+
"metadata": {},
|
1242
|
+
"output_type": "display_data"
|
1243
|
+
},
|
1244
|
+
{
|
1245
|
+
"data": {
|
1246
|
+
"text/html": [
|
1247
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">23,937</span> (93.50 KB)\n",
|
1248
|
+
"</pre>\n"
|
1249
|
+
],
|
1250
|
+
"text/plain": [
|
1251
|
+
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m23,937\u001b[0m (93.50 KB)\n"
|
1252
|
+
]
|
1253
|
+
},
|
1254
|
+
"metadata": {},
|
1255
|
+
"output_type": "display_data"
|
1256
|
+
},
|
1257
|
+
{
|
1258
|
+
"data": {
|
1259
|
+
"text/html": [
|
1260
|
+
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
|
1261
|
+
"</pre>\n"
|
1262
|
+
],
|
1263
|
+
"text/plain": [
|
1264
|
+
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
|
1265
|
+
]
|
1266
|
+
},
|
1267
|
+
"metadata": {},
|
1268
|
+
"output_type": "display_data"
|
1269
|
+
},
|
1270
|
+
{
|
1271
|
+
"name": "stdout",
|
1272
|
+
"output_type": "stream",
|
1273
|
+
"text": [
|
1274
|
+
"Epoch 1/10\n",
|
1275
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 826ms/step - accuracy: 1.0000 - loss: 0.4962\n",
|
1276
|
+
"Epoch 2/10\n",
|
1277
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step - accuracy: 1.0000 - loss: 0.3755\n",
|
1278
|
+
"Epoch 3/10\n",
|
1279
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 1.0000 - loss: 0.2739\n",
|
1280
|
+
"Epoch 4/10\n",
|
1281
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 21ms/step - accuracy: 1.0000 - loss: 0.1912\n",
|
1282
|
+
"Epoch 5/10\n",
|
1283
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 1.0000 - loss: 0.1314\n",
|
1284
|
+
"Epoch 6/10\n",
|
1285
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 21ms/step - accuracy: 1.0000 - loss: 0.0946\n",
|
1286
|
+
"Epoch 7/10\n",
|
1287
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 25ms/step - accuracy: 1.0000 - loss: 0.0697\n",
|
1288
|
+
"Epoch 8/10\n",
|
1289
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 22ms/step - accuracy: 1.0000 - loss: 0.0509\n",
|
1290
|
+
"Epoch 9/10\n",
|
1291
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 1.0000 - loss: 0.0381\n",
|
1292
|
+
"Epoch 10/10\n",
|
1293
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 23ms/step - accuracy: 1.0000 - loss: 0.0285\n",
|
1294
|
+
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 110ms/step - accuracy: 1.0000 - loss: 0.0216\n",
|
1295
|
+
"\n",
|
1296
|
+
"Final loss: 0.021618764847517014\n",
|
1297
|
+
"Final accuracy: 1.0\n"
|
1298
|
+
]
|
1299
|
+
}
|
1300
|
+
],
|
1301
|
+
"source": [
|
1302
|
+
"# Import required libraries\n",
|
1303
|
+
"import nltk\n",
|
1304
|
+
"import numpy as np\n",
|
1305
|
+
"from nltk.corpus import stopwords\n",
|
1306
|
+
"from nltk.tokenize import word_tokenize\n",
|
1307
|
+
"from nltk.stem import WordNetLemmatizer, PorterStemmer\n",
|
1308
|
+
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
1309
|
+
"import tensorflow as tf\n",
|
1310
|
+
"import string\n",
|
1311
|
+
"\n",
|
1312
|
+
"# Download required NLTK resources\n",
|
1313
|
+
"#nltk.download('punkt')\n",
|
1314
|
+
"#nltk.download('stopwords')\n",
|
1315
|
+
"#nltk.download('wordnet')\n",
|
1316
|
+
"\n",
|
1317
|
+
"# Read the content of the file\n",
|
1318
|
+
"file_path = 'text3.txt'\n",
|
1319
|
+
"with open(file_path, 'r') as file:\n",
|
1320
|
+
" text_data = file.read()\n",
|
1321
|
+
"\n",
|
1322
|
+
"# Initialize the stopwords, lemmatizer, and stemmer\n",
|
1323
|
+
"stop_words = set(stopwords.words('english'))\n",
|
1324
|
+
"lemmatizer = WordNetLemmatizer()\n",
|
1325
|
+
"stemmer = PorterStemmer()\n",
|
1326
|
+
"\n",
|
1327
|
+
"# Function to preprocess text data (Tokenization, Lemmatization, Stemming, Stopwords Removal)\n",
|
1328
|
+
"def preprocess_text(text):\n",
|
1329
|
+
" # Tokenize the text\n",
|
1330
|
+
" tokens = word_tokenize(text.lower())\n",
|
1331
|
+
" \n",
|
1332
|
+
" # Remove punctuation and stopwords, and perform lemmatization and stemming\n",
|
1333
|
+
" processed_tokens = []\n",
|
1334
|
+
" for word in tokens:\n",
|
1335
|
+
" if word not in stop_words and word not in string.punctuation:\n",
|
1336
|
+
" lemmatized_word = lemmatizer.lemmatize(word) # Lemmatization\n",
|
1337
|
+
" stemmed_word = stemmer.stem(lemmatized_word) # Stemming\n",
|
1338
|
+
" processed_tokens.append(stemmed_word)\n",
|
1339
|
+
" \n",
|
1340
|
+
" # Join tokens back into a single string\n",
|
1341
|
+
" return ' '.join(processed_tokens)\n",
|
1342
|
+
"\n",
|
1343
|
+
"# Preprocess the text data\n",
|
1344
|
+
"preprocessed_text = preprocess_text(text_data)\n",
|
1345
|
+
"\n",
|
1346
|
+
"# ============================\n",
|
1347
|
+
"# TENSORFLOW TF-IDF PART STARTS\n",
|
1348
|
+
"# ============================\n",
|
1349
|
+
"\n",
|
1350
|
+
"# Initialize the TensorFlow Tokenizer\n",
|
1351
|
+
"tokenizer = tf.keras.preprocessing.text.Tokenizer()\n",
|
1352
|
+
"\n",
|
1353
|
+
"# Fit tokenizer on the preprocessed text\n",
|
1354
|
+
"tokenizer.fit_on_texts([preprocessed_text])\n",
|
1355
|
+
"\n",
|
1356
|
+
"# Convert text to TF-IDF form\n",
|
1357
|
+
"tfidf_results = tokenizer.texts_to_matrix([preprocessed_text], mode='tfidf')\n",
|
1358
|
+
"\n",
|
1359
|
+
"# ============================\n",
|
1360
|
+
"# DEFINE ANN MODEL\n",
|
1361
|
+
"# ============================\n",
|
1362
|
+
"\n",
|
1363
|
+
"# For demo purposes, we create a mock label (you can replace it with your real labels)\n",
|
1364
|
+
"labels = np.array([1]) # Assuming binary classification (0 or 1), change based on your data\n",
|
1365
|
+
"\n",
|
1366
|
+
"# Define the ANN model\n",
|
1367
|
+
"model = tf.keras.Sequential([\n",
|
1368
|
+
" tf.keras.layers.InputLayer(input_shape=(tfidf_results.shape[1],)), # Input layer (TF-IDF input size)\n",
|
1369
|
+
" tf.keras.layers.Dense(64, activation='relu'), # First hidden layer with 64 neurons\n",
|
1370
|
+
" tf.keras.layers.Dense(32, activation='relu'), # Second hidden layer with 32 neurons\n",
|
1371
|
+
" tf.keras.layers.Dense(1, activation='sigmoid') # Output layer (binary classification)\n",
|
1372
|
+
"])\n",
|
1373
|
+
"\n",
|
1374
|
+
"# Compile the model\n",
|
1375
|
+
"model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
|
1376
|
+
"\n",
|
1377
|
+
"# Print model summary\n",
|
1378
|
+
"model.summary()\n",
|
1379
|
+
"\n",
|
1380
|
+
"# Train the model on the TF-IDF data\n",
|
1381
|
+
"history = model.fit(tfidf_results, labels, epochs=10, verbose=1)\n",
|
1382
|
+
"\n",
|
1383
|
+
"# ====================\n",
|
1384
|
+
"# OUTPUT OF ANN TRAINING\n",
|
1385
|
+
"# ====================\n",
|
1386
|
+
"# Evaluate model performance\n",
|
1387
|
+
"loss, accuracy = model.evaluate(tfidf_results, labels, verbose=1)\n",
|
1388
|
+
"print(f\"\\nFinal loss: {loss}\")\n",
|
1389
|
+
"print(f\"Final accuracy: {accuracy}\")\n"
|
1390
|
+
]
|
1391
|
+
},
|
1392
|
+
{
|
1393
|
+
"cell_type": "code",
|
1394
|
+
"execution_count": null,
|
1395
|
+
"id": "66bdb47b-33b3-453c-994f-a7f49c0c21f2",
|
1396
|
+
"metadata": {},
|
1397
|
+
"outputs": [],
|
1398
|
+
"source": []
|
1399
|
+
}
|
1400
|
+
],
|
1401
|
+
"metadata": {
|
1402
|
+
"kernelspec": {
|
1403
|
+
"display_name": "Python 3 (ipykernel)",
|
1404
|
+
"language": "python",
|
1405
|
+
"name": "python3"
|
1406
|
+
},
|
1407
|
+
"language_info": {
|
1408
|
+
"codemirror_mode": {
|
1409
|
+
"name": "ipython",
|
1410
|
+
"version": 3
|
1411
|
+
},
|
1412
|
+
"file_extension": ".py",
|
1413
|
+
"mimetype": "text/x-python",
|
1414
|
+
"name": "python",
|
1415
|
+
"nbconvert_exporter": "python",
|
1416
|
+
"pygments_lexer": "ipython3",
|
1417
|
+
"version": "3.11.7"
|
1418
|
+
}
|
1419
|
+
},
|
1420
|
+
"nbformat": 4,
|
1421
|
+
"nbformat_minor": 5
|
1422
|
+
}
|