package-loader 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package_loader-1.0.0/PKG-INFO +24 -0
  2. package_loader-1.0.0/mycodes/__init__.py +9 -0
  3. package_loader-1.0.0/mycodes/cli.py +139 -0
  4. package_loader-1.0.0/mycodes/dl/__init__.py +7 -0
  5. package_loader-1.0.0/mycodes/dl/boston/__init__.py +1 -0
  6. package_loader-1.0.0/mycodes/dl/boston/dataset.py +40 -0
  7. package_loader-1.0.0/mycodes/dl/boston/model.py +89 -0
  8. package_loader-1.0.0/mycodes/dl/imdb/__init__.py +1 -0
  9. package_loader-1.0.0/mycodes/dl/imdb/dataset.py +40 -0
  10. package_loader-1.0.0/mycodes/dl/imdb/model.py +112 -0
  11. package_loader-1.0.0/mycodes/dl/plant_disease/__init__.py +0 -0
  12. package_loader-1.0.0/mycodes/dl/plant_disease/dataset.py +37 -0
  13. package_loader-1.0.0/mycodes/dl/plant_disease/model.py +81 -0
  14. package_loader-1.0.0/mycodes/dl/stock/__init__.py +1 -0
  15. package_loader-1.0.0/mycodes/dl/stock/dataset.py +40 -0
  16. package_loader-1.0.0/mycodes/dl/stock/model.py +56 -0
  17. package_loader-1.0.0/mycodes/downloader.py +107 -0
  18. package_loader-1.0.0/mycodes/hpc/__init__.py +7 -0
  19. package_loader-1.0.0/mycodes/hpc/dataset.py +78 -0
  20. package_loader-1.0.0/mycodes/nlp/__init__.py +7 -0
  21. package_loader-1.0.0/mycodes/nlp/nlp1/__init__.py +0 -0
  22. package_loader-1.0.0/mycodes/nlp/nlp1/dataset.py +37 -0
  23. package_loader-1.0.0/mycodes/nlp/nlp1/model.py +75 -0
  24. package_loader-1.0.0/mycodes/nlp/nlp2/__init__.py +0 -0
  25. package_loader-1.0.0/mycodes/nlp/nlp2/dataset.py +40 -0
  26. package_loader-1.0.0/mycodes/nlp/nlp2/model.py +82 -0
  27. package_loader-1.0.0/mycodes/nlp/nlp3/__init__.py +0 -0
  28. package_loader-1.0.0/mycodes/nlp/nlp3/dataset.py +71 -0
  29. package_loader-1.0.0/mycodes/nlp/nlp3/model.py +114 -0
  30. package_loader-1.0.0/mycodes/nlp/nlp4/__init__.py +0 -0
  31. package_loader-1.0.0/mycodes/nlp/nlp4/dataset.py +37 -0
  32. package_loader-1.0.0/mycodes/nlp/nlp4/model.py +164 -0
  33. package_loader-1.0.0/mycodes/registry.py +209 -0
  34. package_loader-1.0.0/package_loader.egg-info/PKG-INFO +24 -0
  35. package_loader-1.0.0/package_loader.egg-info/SOURCES.txt +39 -0
  36. package_loader-1.0.0/package_loader.egg-info/dependency_links.txt +1 -0
  37. package_loader-1.0.0/package_loader.egg-info/entry_points.txt +2 -0
  38. package_loader-1.0.0/package_loader.egg-info/requires.txt +15 -0
  39. package_loader-1.0.0/package_loader.egg-info/top_level.txt +1 -0
  40. package_loader-1.0.0/setup.cfg +4 -0
  41. package_loader-1.0.0/setup.py +27 -0
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: package-loader
3
+ Version: 1.0.0
4
+ Summary: CLI tool to download DL, NLP, and HPC codes
5
+ Author: Jhony
6
+ Requires-Python: >=3.7
7
+ Requires-Dist: requests
8
+ Requires-Dist: numpy
9
+ Requires-Dist: pandas
10
+ Requires-Dist: matplotlib
11
+ Requires-Dist: scikit-learn
12
+ Requires-Dist: nltk
13
+ Provides-Extra: dl
14
+ Requires-Dist: torch; extra == "dl"
15
+ Requires-Dist: tensorflow; extra == "dl"
16
+ Provides-Extra: all
17
+ Requires-Dist: torch; extra == "all"
18
+ Requires-Dist: tensorflow; extra == "all"
19
+ Requires-Dist: gensim; extra == "all"
20
+ Dynamic: author
21
+ Dynamic: provides-extra
22
+ Dynamic: requires-dist
23
+ Dynamic: requires-python
24
+ Dynamic: summary
@@ -0,0 +1,9 @@
1
+ """
2
+ MyCode CLI - Download DL, NLP, and HPC practicals
3
+ """
4
+
5
+ from .cli import main
6
+ from .downloader import get_item
7
+
8
+ __all__ = ["main", "get_item"]
9
+ __version__ = "1.0"
@@ -0,0 +1,139 @@
1
+ # cli.py
2
+ """
3
+ Command-line interface for dataset downloads.
4
+
5
+ Usage:
6
+ mycodes get <dataset_name> Download a specific dataset
7
+ mycodes list List all available datasets
8
+ mycodes list <category> List datasets in a category (dl, nlp, hpc)
9
+ mycodes help Show help message
10
+ """
11
+
12
+ import sys
13
+ from .downloader import (
14
+ get_item,
15
+ list_all_datasets,
16
+ get_category_datasets,
17
+ DownloadError,
18
+ DatasetNotFoundError,
19
+ DownloadFailedError,
20
+ )
21
+ from .registry import initialize_registry
22
+
23
+
24
+ def show_help():
25
+ """Display help information."""
26
+ help_text = """
27
+ MyCode CLI - Download DL, NLP, and HPC datasets
28
+
29
+ USAGE:
30
+ mycodes get <name> Download a dataset
31
+ mycodes list List all datasets
32
+ mycodes list <category> List datasets in a category
33
+ mycodes help Show this help message
34
+
35
+ EXAMPLES:
36
+ mycodes get boston Download Boston housing dataset
37
+ mycodes get nlp2 Download NLP2 car dataset
38
+ mycodes get 1bfs Download BFS/DFS HPC code
39
+
40
+ mycodes list Show all available datasets
41
+ mycodes list dl Show all deep learning datasets
42
+ mycodes list nlp Show all NLP datasets
43
+ mycodes list hpc Show all HPC datasets
44
+
45
+ CATEGORIES:
46
+ dl Deep Learning datasets
47
+ nlp Natural Language Processing datasets
48
+ hpc High Performance Computing code files
49
+ """
50
+ print(help_text)
51
+
52
+
53
+ def show_list(category: str = None):
54
+ """Display available datasets."""
55
+ datasets = list_all_datasets()
56
+
57
+ if category:
58
+ if category not in datasets:
59
+ print(f"Error: Unknown category '{category}'")
60
+ print(f"Available categories: {', '.join(sorted(datasets.keys()))}")
61
+ return False
62
+
63
+ items = datasets[category]
64
+ print(f"\n{category.upper()} Datasets:")
65
+ print("-" * 60)
66
+ for name, desc in sorted(items.items()):
67
+ print(f" {name:<20} - {desc}")
68
+ return True
69
+
70
+ # Show all categories
71
+ for cat in sorted(datasets.keys()):
72
+ items = datasets[cat]
73
+ print(f"\n{cat.upper()} Datasets:")
74
+ print("-" * 60)
75
+ for name, desc in sorted(items.items()):
76
+ print(f" {name:<20} - {desc}")
77
+
78
+ return True
79
+
80
+
81
+ def main():
82
+ """Main CLI entry point."""
83
+ # Initialize registry on first run
84
+ initialize_registry()
85
+
86
+ if len(sys.argv) < 2:
87
+ print("Usage: mycodes <command> [args]")
88
+ print("Try 'mycodes help' for more information")
89
+ sys.exit(1)
90
+
91
+ command = sys.argv[1].lower()
92
+
93
+ try:
94
+ if command == "get":
95
+ if len(sys.argv) < 3:
96
+ print("Error: 'get' command requires a dataset name")
97
+ print("Usage: mycodes get <dataset_name>")
98
+ print("Try 'mycodes list' to see available datasets")
99
+ sys.exit(1)
100
+
101
+ dataset_name = sys.argv[2]
102
+ print(f"Downloading '{dataset_name}'...")
103
+
104
+ try:
105
+ result = get_item(dataset_name)
106
+ print(f"✓ Success: Downloaded to {result}")
107
+ sys.exit(0)
108
+ except DatasetNotFoundError as e:
109
+ print(f"✗ Error: {e}")
110
+ sys.exit(1)
111
+ except DownloadFailedError as e:
112
+ print(f"✗ Download failed: {e}")
113
+ sys.exit(1)
114
+
115
+ elif command == "list":
116
+ category = sys.argv[2].lower() if len(sys.argv) > 2 else None
117
+ success = show_list(category)
118
+ sys.exit(0 if success else 1)
119
+
120
+ elif command == "help":
121
+ show_help()
122
+ sys.exit(0)
123
+
124
+ else:
125
+ print(f"Error: Unknown command '{command}'")
126
+ print("Available commands: get, list, help")
127
+ print("Try 'mycodes help' for more information")
128
+ sys.exit(1)
129
+
130
+ except DownloadError as e:
131
+ print(f"Download Error: {e}", file=sys.stderr)
132
+ sys.exit(1)
133
+ except Exception as e:
134
+ print(f"Unexpected error: {e}", file=sys.stderr)
135
+ sys.exit(1)
136
+
137
+
138
+ if __name__ == "__main__":
139
+ main()
@@ -0,0 +1,7 @@
1
+ """Deep Learning practicals
2
+
3
+ This module provides lazy-loaded access to deep learning datasets.
4
+ Do NOT import models to avoid executing model logic at import time.
5
+ """
6
+
7
+ __all__ = []
@@ -0,0 +1 @@
1
+ from .dataset import download_data
@@ -0,0 +1,40 @@
1
+ """
2
+ Boston Housing Price Prediction Dataset downloader.
3
+
4
+ This module provides lazy-loaded dataset downloads without executing model logic.
5
+ No imports of heavy libraries happen at module level.
6
+ """
7
+
8
+ import os
9
+ import requests
10
+
11
+
12
+ URL = "https://raw.githubusercontent.com/Vishwajeet-Londhe/SPPU-CSE-SEM8-Codes/refs/heads/master/LP-V/DL/1.Boston%20housing%20price/1_boston_housing.csv"
13
+ PATH = os.path.join(os.getcwd(), "data", "boston.csv")
14
+
15
+
16
+ def download_data():
17
+ """
18
+ Download Boston Housing Price dataset.
19
+
20
+ Returns:
21
+ str: Path to the downloaded dataset file
22
+
23
+ Raises:
24
+ RuntimeError: If download fails
25
+ """
26
+ if os.path.exists(PATH):
27
+ print("Already exists")
28
+ return PATH
29
+
30
+ os.makedirs(os.path.dirname(PATH), exist_ok=True)
31
+
32
+ try:
33
+ r = requests.get(URL)
34
+ r.raise_for_status()
35
+ with open(PATH, "wb") as f:
36
+ f.write(r.content)
37
+ print(f"Downloaded to {PATH}")
38
+ return PATH
39
+ except requests.RequestException as e:
40
+ raise RuntimeError(f"Failed to download dataset: {e}")
@@ -0,0 +1,89 @@
1
+ #Step 1: Import Libraries
2
+ import numpy as np
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import StandardScaler
8
+
9
+ import tensorflow as tf
10
+ from tensorflow.keras.models import Sequential
11
+ from tensorflow.keras.layers import Dense
12
+ #Step 2: Load Dataset
13
+ # Load dataset
14
+ df = pd.read_csv("1_boston_housing.csv")
15
+
16
+ # Remove quotes from column names (important for your dataset)
17
+ df.columns = df.columns.str.replace('"', '')
18
+
19
+ # Display first 5 rows
20
+ df.head()
21
+ #Step 3: Split Features and Target
22
+ X = df.drop("MEDV", axis=1) # Features
23
+ y = df["MEDV"] # Target
24
+
25
+ print(X.shape, y.shape)
26
+ #Step 4: Train-Test Split
27
+ X_train, X_test, y_train, y_test = train_test_split(
28
+ X, y, test_size=0.2, random_state=42
29
+ )
30
+ #Step 5: Feature Scaling
31
+ scaler = StandardScaler()
32
+
33
+ X_train = scaler.fit_transform(X_train)
34
+ X_test = scaler.transform(X_test)
35
+ model = Sequential()
36
+ model.add(Dense(1, input_shape=(X_train.shape[1],), activation='linear'))
37
+
38
+ model.compile(
39
+ optimizer='adam',
40
+ loss='mse',
41
+ metrics=['mae']
42
+ )
43
+
44
+ history = model.fit(
45
+ X_train, y_train,
46
+ epochs=100,
47
+ validation_split=0.05,
48
+ verbose=1
49
+ )
50
+ #Step 6: Build Deep Neural Network
51
+ model = Sequential()
52
+
53
+ # Single neuron → behaves like linear regression
54
+ model.add(Dense(1, input_shape=(X_train.shape[1],), activation='linear'))
55
+
56
+ model.compile(
57
+ optimizer='adam',
58
+ loss='mse',
59
+ metrics=['mae']
60
+ )
61
+
62
+ model.summary()
63
+ #Step 7: Train Model
64
+ history = model.fit(
65
+ X_train, y_train,
66
+ epochs=100,
67
+ batch_size=16,
68
+ validation_split=0.2,
69
+ verbose=1
70
+ )
71
+ #Step 8: Evaluate Model
72
+ loss, mae = model.evaluate(X_test, y_test)
73
+
74
+ print("Test Loss (MSE):", loss)
75
+ print("Test MAE:", mae)
76
+ #Step 9: Predictions
77
+ y_pred = model.predict(X_test)
78
+
79
+ # Compare actual vs predicted
80
+ for i in range(5):
81
+ print(f"Actual: {y_test.iloc[i]:.2f} | Predicted: {y_pred[i][0]:.2f}")
82
+ #Step 10: Plot Training Graphs
83
+ plt.plot(history.history['loss'], label='Training Loss')
84
+ plt.plot(history.history['val_loss'], label='Validation Loss')
85
+ plt.legend()
86
+ plt.title("Loss Graph")
87
+ plt.xlabel("Epochs")
88
+ plt.ylabel("Loss")
89
+ plt.show()
@@ -0,0 +1 @@
1
+ from .dataset import download_data
@@ -0,0 +1,40 @@
1
+ """
2
+ IMDB Movie Reviews Dataset downloader.
3
+
4
+ This module provides lazy-loaded dataset downloads without executing model logic.
5
+ No imports of heavy libraries happen at module level.
6
+ """
7
+
8
+ import os
9
+ import requests
10
+
11
+
12
+ URL = "https://github.com/Vishwajeet-Londhe/SPPU-CSE-SEM8-Codes/raw/refs/heads/master/LP-V/DL/2.IMDB/IMDB_Dataset.csv"
13
+ PATH = os.path.join(os.getcwd(), "data", "imdb.csv")
14
+
15
+
16
+ def download_data():
17
+ """
18
+ Download IMDB Movie Reviews dataset.
19
+
20
+ Returns:
21
+ str: Path to the downloaded dataset file
22
+
23
+ Raises:
24
+ RuntimeError: If download fails
25
+ """
26
+ if os.path.exists(PATH):
27
+ print("Already exists")
28
+ return PATH
29
+
30
+ os.makedirs(os.path.dirname(PATH), exist_ok=True)
31
+
32
+ try:
33
+ r = requests.get(URL)
34
+ r.raise_for_status()
35
+ with open(PATH, "wb") as f:
36
+ f.write(r.content)
37
+ print(f"Downloaded to {PATH}")
38
+ return PATH
39
+ except requests.RequestException as e:
40
+ raise RuntimeError(f"Failed to download dataset: {e}")
@@ -0,0 +1,112 @@
1
+ # =========================
2
+ # 1. IMPORT LIBRARIES
3
+ # =========================
4
+ import numpy as np
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import re
8
+
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.feature_extraction.text import TfidfVectorizer
11
+
12
+ from tensorflow.keras.models import Sequential
13
+ from tensorflow.keras.layers import Dense
14
+ # =========================
15
+ # 2. LOAD DATASET (SAFE)
16
+ # =========================
17
+ df = pd.read_csv(
18
+ "IMDB_Dataset.csv",
19
+ encoding_errors='ignore',
20
+ on_bad_lines='skip',
21
+ engine='python'
22
+ )
23
+
24
+ print("Dataset Loaded ✅")
25
+ print(df.head())
26
+ print(df.columns)
27
+ df = df.dropna()
28
+ print(df['sentiment'].unique())
29
+ print(df['sentiment'].isnull().sum())
30
+ df = df.dropna()
31
+
32
+ print(df['sentiment'].unique()) # should be [0,1]
33
+ print(df.isnull().sum()) # should be 0
34
+ # =========================
35
+ # 3. FIX COLUMN NAMES
36
+ # =========================
37
+ df.columns = ['review', 'sentiment']
38
+ # =========================
39
+ # 4. CLEAN TEXT
40
+ # =========================
41
+ def clean_text(text):
42
+ text = str(text)
43
+ text = text.lower()
44
+ text = re.sub(r'<.*?>', '', text) # remove HTML
45
+ text = re.sub(r'[^a-zA-Z ]', '', text) # remove symbols
46
+ return text
47
+
48
+ df['review'] = df['review'].apply(clean_text)
49
+ # =========================
50
+ # 5. CONVERT LABELS
51
+ # =========================
52
+ df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})
53
+
54
+ print("\nSentiment Count:")
55
+ print(df['sentiment'].value_counts())
56
+ # =========================
57
+ # 6. TF-IDF VECTORIZATION
58
+ # =========================
59
+ vectorizer = TfidfVectorizer(max_features=5000)
60
+ X = vectorizer.fit_transform(df['review']).toarray()
61
+ y = df['sentiment']
62
+
63
+ # =========================
64
+ # 7. TRAIN TEST SPLIT
65
+ # =========================
66
+ X_train, X_test, y_train, y_test = train_test_split(
67
+ X, y, test_size=0.2, random_state=42
68
+ )
69
+
70
+ # =========================
71
+ # 8. BUILD MODEL
72
+ # =========================
73
+ model = Sequential()
74
+
75
+ model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
76
+ model.add(Dense(64, activation='relu'))
77
+ model.add(Dense(1, activation='sigmoid'))
78
+
79
+ model.compile(
80
+ optimizer='adam',
81
+ loss='binary_crossentropy',
82
+ metrics=['accuracy']
83
+ )
84
+
85
+ model.summary()
86
+
87
+
88
+ # =========================
89
+ # 9. TRAIN MODEL
90
+ # =========================
91
+ history = model.fit(
92
+ X_train, y_train,
93
+ epochs=10,
94
+ batch_size=32,
95
+ validation_data=(X_test, y_test)
96
+ )
97
+ # =========================
98
+ # 10. PLOT GRAPH
99
+ # =========================
100
+ plt.plot(history.history['accuracy'], label='Training Accuracy')
101
+ plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
102
+
103
+ plt.title("Accuracy Graph")
104
+ plt.xlabel("Epochs")
105
+ plt.ylabel("Accuracy")
106
+ plt.legend()
107
+ plt.show()
108
+ # =========================
109
+ # 11. FINAL ACCURACY
110
+ # =========================
111
+ loss, accuracy = model.evaluate(X_test, y_test)
112
+ print("\nFinal Accuracy:", accuracy)
@@ -0,0 +1,37 @@
1
+ """
2
+ Plant Disease Classification Dataset downloader.
3
+
4
+ This module provides lazy-loaded dataset downloads without executing model logic.
5
+ No imports of heavy libraries happen at module level.
6
+ """
7
+
8
+ import os
9
+ import requests
10
+
11
+
12
+ URL = "https://github.com/Vishwajeet-Londhe/SPPU-CSE-SEM8-Codes/raw/refs/heads/master/LP-V/DL/3.Plant_Disease/plant_disease_data.csv"
13
+ PATH = os.path.join(os.getcwd(), "data", "plant_disease.csv")
14
+
15
+
16
+ def download_data():
17
+ """
18
+ Download plant disease classification dataset.
19
+
20
+ Returns:
21
+ str: Path to the downloaded dataset file
22
+ """
23
+ if os.path.exists(PATH):
24
+ print("Already exists")
25
+ return PATH
26
+
27
+ os.makedirs(os.path.dirname(PATH), exist_ok=True)
28
+
29
+ try:
30
+ r = requests.get(URL)
31
+ r.raise_for_status()
32
+ with open(PATH, "wb") as f:
33
+ f.write(r.content)
34
+ print(f"Downloaded to {PATH}")
35
+ return PATH
36
+ except requests.RequestException as e:
37
+ raise RuntimeError(f"Failed to download dataset: {e}")
@@ -0,0 +1,81 @@
1
+ import tensorflow as tf
2
+ from tensorflow.keras.models import Sequential
3
+ from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
4
+ from tensorflow.keras.preprocessing.image import ImageDataGenerator
5
+ import matplotlib.pyplot as plt
6
+ train_path = "Plant_Disease_Dataset/train"
7
+ valid_path = "Plant_Disease_Dataset/valid"
8
+ test_path = "Plant_Disease_Dataset/test"
9
+ train_datagen = ImageDataGenerator(
10
+ rescale=1./255,
11
+ rotation_range=20,
12
+ zoom_range=0.2,
13
+ horizontal_flip=True
14
+ )
15
+
16
+ valid_datagen = ImageDataGenerator(rescale=1./255)
17
+ test_datagen = ImageDataGenerator(rescale=1./255)
18
+ train_data = train_datagen.flow_from_directory(
19
+ train_path,
20
+ target_size=(128, 128),
21
+ batch_size=32,
22
+ class_mode='categorical'
23
+ )
24
+
25
+ valid_data = valid_datagen.flow_from_directory(
26
+ valid_path,
27
+ target_size=(128, 128),
28
+ batch_size=32,
29
+ class_mode='categorical'
30
+ )
31
+
32
+ test_data = test_datagen.flow_from_directory(
33
+ test_path,
34
+ target_size=(128, 128),
35
+ batch_size=32,
36
+ class_mode='categorical'
37
+ )
38
+ model = Sequential()
39
+
40
+ # Conv Layer 1
41
+ model.add(Conv2D(32, (3,3), activation='relu', input_shape=(128,128,3)))
42
+ model.add(MaxPooling2D(2,2))
43
+
44
+ # Conv Layer 2
45
+ model.add(Conv2D(64, (3,3), activation='relu'))
46
+ model.add(MaxPooling2D(2,2))
47
+
48
+ # Conv Layer 3
49
+ model.add(Conv2D(128, (3,3), activation='relu'))
50
+ model.add(MaxPooling2D(2,2))
51
+
52
+ # Flatten
53
+ model.add(Flatten())
54
+
55
+ # Fully Connected
56
+ model.add(Dense(128, activation='relu'))
57
+ model.add(Dropout(0.5))
58
+
59
+ # Output Layer
60
+ model.add(Dense(train_data.num_classes, activation='softmax'))
61
+
62
+ model.compile(
63
+ optimizer='adam',
64
+ loss='categorical_crossentropy',
65
+ metrics=['accuracy']
66
+ )
67
+
68
+ model.summary()
69
+ history = model.fit(
70
+ train_data,
71
+ epochs=3,
72
+ validation_data=valid_data
73
+ )
74
+ plt.plot(history.history['accuracy'], label='Training Accuracy')
75
+ plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
76
+
77
+ plt.title("CNN Accuracy Graph")
78
+ plt.xlabel("Epochs")
79
+ plt.ylabel("Accuracy")
80
+ plt.legend()
81
+ plt.show()
@@ -0,0 +1 @@
1
+ from .dataset import download_data
@@ -0,0 +1,40 @@
1
+ """
2
+ Google Stock Price Dataset downloader.
3
+
4
+ This module provides lazy-loaded dataset downloads without executing model logic.
5
+ No imports of heavy libraries happen at module level.
6
+ """
7
+
8
+ import os
9
+ import requests
10
+
11
+
12
+ URL = "https://raw.githubusercontent.com/Vishwajeet-Londhe/SPPU-CSE-SEM8-Codes/refs/heads/master/LP-V/DL/4.Google_Stock_Price/Google_Stock_Price.csv"
13
+ PATH = os.path.join(os.getcwd(), "data", "stock.csv")
14
+
15
+
16
+ def download_data():
17
+ """
18
+ Download Google Stock Price dataset.
19
+
20
+ Returns:
21
+ str: Path to the downloaded dataset file
22
+
23
+ Raises:
24
+ RuntimeError: If download fails
25
+ """
26
+ if os.path.exists(PATH):
27
+ print("Already exists")
28
+ return PATH
29
+
30
+ os.makedirs(os.path.dirname(PATH), exist_ok=True)
31
+
32
+ try:
33
+ r = requests.get(URL)
34
+ r.raise_for_status()
35
+ with open(PATH, "wb") as f:
36
+ f.write(r.content)
37
+ print(f"Downloaded to {PATH}")
38
+ return PATH
39
+ except requests.RequestException as e:
40
+ raise RuntimeError(f"Failed to download dataset: {e}")
@@ -0,0 +1,56 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+
5
+ from sklearn.preprocessing import MinMaxScaler
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.layers import Dense, SimpleRNN
8
+ #2. Load Dataset
9
+ df = pd.read_csv("Google_Stock_Price.csv", thousands=',')
10
+
11
+ # Take Open column
12
+ data = df['Open'].values.reshape(-1, 1)
13
+ scaler = MinMaxScaler(feature_range=(0,1))
14
+ # Clean non-numeric values like 'GOOGL' / NaN from Open column, then scale
15
+ data = pd.to_numeric(df['Open'], errors='coerce').dropna().values.reshape(-1, 1)
16
+ data_scaled = scaler.fit_transform(data)
17
+ train_size = int(len(data_scaled) * 0.8)
18
+
19
+ train_data = data_scaled[:train_size]
20
+ test_data = data_scaled[train_size:]
21
+ def create_dataset(dataset):
22
+ X = []
23
+ y = []
24
+
25
+ for i in range(60, len(dataset)):
26
+ X.append(dataset[i-60:i, 0])
27
+ y.append(dataset[i, 0])
28
+
29
+ return np.array(X), np.array(y)
30
+
31
+ X_train, y_train = create_dataset(train_data)
32
+ X_test, y_test = create_dataset(test_data)
33
+ X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
34
+ X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
35
+ model = Sequential()
36
+
37
+ model.add(SimpleRNN(50, return_sequences=True, input_shape=(60,1)))
38
+ model.add(SimpleRNN(50))
39
+ model.add(Dense(1))
40
+
41
+ model.compile(optimizer='adam', loss='mean_squared_error')
42
+
43
+ model.summary()
44
+ model.fit(X_train, y_train, epochs=20, batch_size=32)
45
+ predicted = model.predict(X_test)
46
+
47
+ predicted = scaler.inverse_transform(predicted)
48
+ real = scaler.inverse_transform(y_test.reshape(-1,1))
49
+ plt.plot(real, color='red', label='Real Price')
50
+ plt.plot(predicted, color='blue', label='Predicted Price')
51
+
52
+ plt.title("Google Stock Price Prediction (RNN)")
53
+ plt.xlabel("Time")
54
+ plt.ylabel("Price")
55
+ plt.legend()
56
+ plt.show()