bigdata-helper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bigdata-helper might be problematic. Click here for more details.

@@ -0,0 +1,4 @@
1
+ from .codes import get_code, list_codes # re-export for simple import surface
2
+
3
+ __all__ = ["get_code", "list_codes"]
4
+ __version__ = "0.1.0"
@@ -0,0 +1,170 @@
1
+
2
+ from typing import Dict, List
3
+
4
+ def mini_code() -> str:
5
+ return """
6
+ import pandas as pd
7
+ import numpy as np
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.preprocessing import StandardScaler
11
+ from sklearn.linear_model import LinearRegression
12
+ from sklearn.tree import DecisionTreeRegressor
13
+ from sklearn.ensemble import RandomForestRegressor
14
+ from sklearn.svm import SVR
15
+ from sklearn.neighbors import KNeighborsRegressor
16
+ from sklearn.metrics import r2_score, mean_squared_error
17
+
18
+ def load_data(path):
19
+ data = pd.read_csv(path)
20
+ X = data[['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR', 'CGPA', 'Research']]
21
+ y = data['Chance of Admit']
22
+ return X, y
23
+
24
+ def build_pipeline(model):
25
+ return Pipeline([
26
+ ('scaler', StandardScaler()),
27
+ ('model', model)
28
+ ])
29
+
30
+ def evaluate_model(model, X_test, y_test):
31
+ y_pred = model.predict(X_test)
32
+ r2 = r2_score(y_test, y_pred)
33
+ rmse = np.sqrt(mean_squared_error(y_test, y_pred))
34
+ return r2, rmse
35
+
36
+ def main():
37
+ X, y = load_data("Synthetic_Graduate_Admissions.csv")
38
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
39
+
40
+ models = {
41
+ "Linear Regression": LinearRegression(),
42
+ "Decision Tree": DecisionTreeRegressor(random_state=42),
43
+ "Random Forest": RandomForestRegressor(random_state=42),
44
+ "Support Vector Regressor": SVR(kernel='rbf'),
45
+ "KNN Regressor": KNeighborsRegressor(n_neighbors=5)
46
+ }
47
+
48
+ results = []
49
+ for name, model in models.items():
50
+ pipe = build_pipeline(model)
51
+ pipe.fit(X_train, y_train)
52
+ r2, rmse = evaluate_model(pipe, X_test, y_test)
53
+ results.append({"Model": name, "R2 Score": round(r2, 3), "RMSE": round(rmse, 3)})
54
+
55
+ results_df = pd.DataFrame(results)
56
+ print("\\nModel Performance Summary:")
57
+ print(results_df.to_string(index=False))
58
+
59
+ sample = pd.DataFrame([[320, 110, 4, 4.5, 4.0, 9.0, 1]],
60
+ columns=['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR', 'CGPA', 'Research'])
61
+ best_model = build_pipeline(RandomForestRegressor(random_state=42))
62
+ best_model.fit(X, y)
63
+ pred = best_model.predict(sample)
64
+ print(f"\\nPredicted Chance of Admission: {pred[0]*100:.2f}%")
65
+
66
+ if __name__ == "__main__":
67
+ main()
68
+ """
69
+
70
+ def forestfire_code() -> str:
71
+ return """
72
+ from multiprocessing import Pool
73
+ import pandas as pd
74
+ import sqlite3
75
+
76
+
77
+ def mapper(row):
78
+ return (row["Month"], row["Temperature_Celsius"])
79
+
80
+
81
+ def reducer(mapped_data):
82
+ result = {}
83
+ for month, temp in mapped_data:
84
+ result.setdefault(month, []).append(temp)
85
+ return {m: sum(v) / len(v) for m, v in result.items()}
86
+
87
+
88
+ def run_mapreduce(df):
89
+ with Pool() as p:
90
+ mapped = p.map(mapper, [row for _, row in df.iterrows()])
91
+ reduced = reducer(mapped)
92
+
93
+ print("\\nAverage Temperature per Month:")
94
+ for m, t in reduced.items():
95
+ print(f"{m}: {t:.2f}")
96
+ return reduced
97
+
98
+
99
+ def top_fire_months(df, top_n=5):
100
+ top = df.groupby("Month")["Burned_Area_hectares"].mean().sort_values(ascending=False).head(top_n)
101
+ print(f"\\nTop {top_n} Months with Largest Fire Area:\\n{top}\\n")
102
+ return top
103
+
104
+
105
+ def temperature_area_correlation(df):
106
+ corr = df["Temperature_Celsius"].corr(df["Burned_Area_hectares"])
107
+ print(f"Correlation between Temperature and Fire Area: {corr:.2f}")
108
+ return corr
109
+
110
+
111
+ def query_avg_area_by_month(conn):
112
+ query = '''
113
+ SELECT Month, AVG(Burned_Area_hectares) AS avg_area
114
+ FROM forestfires
115
+ GROUP BY Month
116
+ ORDER BY avg_area DESC;
117
+ '''
118
+ result = pd.read_sql_query(query, conn)
119
+ print("\\nAverage Burned Area by Month (from SQL):")
120
+ print(result)
121
+ return result
122
+
123
+
124
+ def run_pipeline():
125
+ print("=== Forest Fire Analysis Pipeline Started ===\\n")
126
+
127
+ df = pd.read_csv("forestfires.csv")
128
+ print(f"Loaded dataset with {len(df)} rows and {len(df.columns)} columns.")
129
+
130
+ conn = sqlite3.connect("forestfires.db")
131
+ df.to_sql("forestfires", conn, if_exists="replace", index=False)
132
+ print("Data saved to SQLite database.\\n")
133
+
134
+ run_mapreduce(df)
135
+ top_fire_months(df)
136
+ temperature_area_correlation(df)
137
+ query_avg_area_by_month(conn)
138
+
139
+ print("\\n=== Pipeline Completed Successfully ===")
140
+
141
+
142
+ if __name__ == "__main__":
143
+ run_pipeline()
144
+ """
145
+
146
+ def placeholder_code() -> str:
147
+ return "# Add your next practical code here..."
148
+
149
+ def get_code_map() -> Dict[str, str]:
150
+ return {
151
+ "mini": mini_code(),
152
+ "forestfire": forestfire_code(),
153
+ "placeholder": placeholder_code(),
154
+ }
155
+
156
+ def get_code(name: str) -> str:
157
+ """
158
+ Retrieve a stored code snippet by name.
159
+ Available names: see list_codes().
160
+ """
161
+ key = (name or "").strip().lower()
162
+ mapping = get_code_map()
163
+ if key not in mapping:
164
+ available = ", ".join(sorted(mapping.keys()))
165
+ raise KeyError(f"Code '{name}' not found. Try one of: {available}")
166
+ return mapping[key]
167
+
168
+ def list_codes() -> List[str]:
169
+ \"Return a list of available code names.\"
170
+ return sorted(get_code_map().keys())
@@ -0,0 +1,49 @@
1
+ Metadata-Version: 2.4
2
+ Name: bigdata-helper
3
+ Version: 0.1.0
4
+ Summary: A Python library containing Big Data practical codes for quick access
5
+ Author: Satyam Kale
6
+ License: MIT
7
+ Project-URL: Homepage, https://pypi.org/project/bigdata-helper/
8
+ Keywords: big data,exam,codes,snippets,education
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Dynamic: license-file
16
+
17
+ # bigdata-helper
18
+
19
+ **Version:** 0.1.0
20
+ **Author:** Satyam Kale
21
+
22
+ A lightweight library that stores ready-to-use Big Data practical codes and lets you retrieve them instantly as strings.
23
+
24
+ > ⚠️ This package is intended for learning, practice, and revision. Use responsibly and follow your institution's academic policies.
25
+
26
+ ## Install
27
+
28
+ ```bash
29
+ pip install bigdata-helper
30
+ ```
31
+
32
+ ## Quick Start
33
+
34
+ ```python
35
+ from bigdata_helper import get_code, list_codes
36
+
37
+ print(list_codes())
38
+ print(get_code("mini"))
39
+ print(get_code("forestfire"))
40
+ ```
41
+
42
+ ## Available Codes
43
+
44
+ - `mini` — Multiple regressors benchmark on a synthetic Graduate Admissions dataset (prints model performance and a sample prediction).
45
+ - `forestfire` — MapReduce-like analysis pipeline for forest fire dataset with SQLite, correlation, and monthly summaries.
46
+
47
+ ## Add More
48
+
49
+ You can contribute more codes by adding functions to `bigdata_helper/codes.py` and mapping them in `get_code_map()`.
@@ -0,0 +1,7 @@
1
+ bigdata_helper/__init__.py,sha256=lGx6yZpbbcrAzfYykkn98UTlHBm0eKdJtY97JVi3ugo,139
2
+ bigdata_helper/codes.py,sha256=p3iBqSuJDFpTLJUCrlgSK_ZvAWbGYWX0bNWTo28kbE8,5112
3
+ bigdata_helper-0.1.0.dist-info/licenses/LICENSE,sha256=iTPTuqITG-wTuMlwnmMMWhrGaLkb82N_j9bmaTSvRSc,1068
4
+ bigdata_helper-0.1.0.dist-info/METADATA,sha256=_0sX6mHgvrVmcpkUfq-Vl174VytQ21YeLsiwGdJyy7E,1530
5
+ bigdata_helper-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ bigdata_helper-0.1.0.dist-info/top_level.txt,sha256=tVWTBB3_zxa1Y6Rk2rFzjTI-3MrOsymgCaDVBw8n5AQ,15
7
+ bigdata_helper-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Satyam Kale
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ bigdata_helper