jrpybestpracccc 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. jrpybestpracccc-0.1.1/PKG-INFO +14 -0
  2. jrpybestpracccc-0.1.1/jrpybestpracccc/__init__.py +3 -0
  3. jrpybestpracccc-0.1.1/jrpybestpracccc/__version__.py +1 -0
  4. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/bad_project/.gitignore +3 -0
  5. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/bad_project/data/.gitignore +0 -0
  6. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/bad_project/experiment.ipynb +193 -0
  7. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/bad_project/setup.cfg +7 -0
  8. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/.gitignore +3 -0
  9. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/data/.gitignore +0 -0
  10. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/experiment.ipynb +1024 -0
  11. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/setup.cfg +7 -0
  12. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/__init__.py +0 -0
  13. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/config/filenames.py +3 -0
  14. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/config/theme.py +147 -0
  15. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/data/__init__.py +2 -0
  16. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/data/preprocessing.py +341 -0
  17. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/data/storage.py +99 -0
  18. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/modelling/__init__.py +4 -0
  19. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/modelling/ingredients.py +58 -0
  20. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/modelling/predictions.py +18 -0
  21. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/modelling/scoring.py +18 -0
  22. jrpybestpracccc-0.1.1/jrpybestpracccc/extra_scripts/example_projects/good_project/src/modelling/training.py +42 -0
  23. jrpybestpracccc-0.1.1/pyproject.toml +15 -0
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.1
2
+ Name: jrpybestpracccc
3
+ Version: 0.1.1
4
+ Summary: Jumping Rivers: Python Best Practices
5
+ Author: Jumping Rivers
6
+ Author-email: info@jumpingrivers.com
7
+ Requires-Python: >=3.10
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Dist: jrpybestprac (>=0.1.3)
13
+ Requires-Dist: plotly (>=6.0.0)
14
+ Requires-Dist: scikit-learn (>=1.1.1)
@@ -0,0 +1,3 @@
1
+ from .__version__ import __version__ # noqa: F401
2
+
3
+ from . import vignettes # noqa: F401
@@ -0,0 +1 @@
1
+ __version__ = "0.1.1"
@@ -0,0 +1,193 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "051b5e69",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Experimental Notebook"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "ce57b322",
14
+ "metadata": {},
15
+ "source": [
16
+ "## Data loading"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 2,
22
+ "id": "b155d9e6",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "import pandas as pd\n",
27
+ "\n",
28
+ "\n",
29
+ "# Load raw data\n",
30
+ "archetype_df = pd.read_csv(\n",
31
+ " \"data/Baseline archetype properties.csv\"\n",
32
+ ")\n",
33
+ "\n",
34
+ "# Skip the first 4 rows as they contain metadata or headers not relevant to the DataFrame\n",
35
+ "fixed_capex_df = pd.read_csv(\n",
36
+ " \"data/Input Baseline capex fixed.csv\",\n",
37
+ " skiprows=4,\n",
38
+ ")\n",
39
+ "# Skip the first 4 rows as they contain metadata or headers not relevant to the DataFrame\n",
40
+ "capex_by_power_df = pd.read_csv(\n",
41
+ " \"data/Input Baseline capex by power.csv\",\n",
42
+ " skiprows=4,\n",
43
+ ")"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "markdown",
48
+ "id": "ef90ee7d",
49
+ "metadata": {},
50
+ "source": [
51
+ "## Data preprocessing"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 3,
57
+ "id": "5d787f54",
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "# Preprocess data\n",
62
+ "fixed_capex_df = fixed_capex_df.merge(\n",
63
+ " archetype_df,\n",
64
+ " left_on=\"Heating system\",\n",
65
+ " right_on=\"Heating system\",\n",
66
+ " how=\"inner\",\n",
67
+ ")\n",
68
+ "\n",
69
+ "capex_by_power_df = capex_by_power_df.merge(\n",
70
+ " archetype_df,\n",
71
+ " left_on=\"Heating system\",\n",
72
+ " right_on=\"Heating system\",\n",
73
+ " how=\"inner\",\n",
74
+ ")\n",
75
+ "\n",
76
+ "year_cols = [col for col in capex_by_power_df.columns if col.startswith(\"20\")]\n",
77
+ "\n",
78
+ "capex_by_power_df[year_cols] = capex_by_power_df[year_cols].multiply(\n",
79
+ " capex_by_power_df[\"Heating system size\"], axis=0\n",
80
+ ")\n",
81
+ "\n",
82
+ "all_capex_df =pd.concat([capex_by_power_df, fixed_capex_df])\n",
83
+ "\n",
84
+ "year_cols = [col for col in all_capex_df.columns if col.startswith(\"20\")]\n",
85
+ "agg_dict = {col: 'first' for col in all_capex_df.columns if col not in year_cols}\n",
86
+ "agg_dict.update({col: 'sum' for col in year_cols})\n",
87
+ "\n",
88
+ "all_capex_df = all_capex_df.groupby(by=\"Constrained archetype number\", as_index=False).agg(agg_dict)\n",
89
+ "\n",
90
+ "columns_to_drop = [\n",
91
+ " \"Cost type variant\",\n",
92
+ " \"Assumptions\",\n",
93
+ " \"Heating system\",\n",
94
+ "]\n",
95
+ "\n",
96
+ "all_capex_df = all_capex_df.drop(columns=columns_to_drop)\n",
97
+ "\n",
98
+ "all_capex_df[\"Variable unit\"] = \"£\"\n",
99
+ "\n",
100
+ "all_capex_df[\"Data name\"] = \"Baseline capex per home by renewal year\"\n",
101
+ "\n",
102
+ "all_capex_df.to_csv(\"data/Baseline heating system capex.csv\", index=False)"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "markdown",
107
+ "id": "4e058f29",
108
+ "metadata": {},
109
+ "source": [
110
+ "## Data modelling"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": 4,
116
+ "id": "0cd24f13",
117
+ "metadata": {},
118
+ "outputs": [
119
+ {
120
+ "data": {
121
+ "text/plain": [
122
+ "638.7722752913661"
123
+ ]
124
+ },
125
+ "execution_count": 4,
126
+ "metadata": {},
127
+ "output_type": "execute_result"
128
+ }
129
+ ],
130
+ "source": [
131
+ "from sklearn.linear_model import LinearRegression\n",
132
+ "from sklearn.metrics import mean_absolute_error\n",
133
+ "from sklearn.model_selection import train_test_split\n",
134
+ "from sklearn.pipeline import Pipeline\n",
135
+ "from sklearn.preprocessing import OneHotEncoder\n",
136
+ "\n",
137
+ "\n",
138
+ "# Data extraction\n",
139
+ "target = \"2020\"\n",
140
+ "predictors = [\n",
141
+ " \"Tenure\",\n",
142
+ " \"Region\",\n",
143
+ " \"Fuel poverty\",\n",
144
+ " \"Property type\",\n",
145
+ "]\n",
146
+ "\n",
147
+ "y = all_capex_df[target]\n",
148
+ "X = all_capex_df[predictors]\n",
149
+ "\n",
150
+ "# Train-test split\n",
151
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
152
+ " X, y, test_size=0.2\n",
153
+ ")\n",
154
+ "\n",
155
+ "# Construct the model pipeline\n",
156
+ "model = Pipeline(\n",
157
+ " [\n",
158
+ " (\"transform\", OneHotEncoder(drop=\"first\")),\n",
159
+ " (\"model\", LinearRegression()),\n",
160
+ " ]\n",
161
+ ")\n",
162
+ "\n",
163
+ "# Fit the model\n",
164
+ "model.fit(X_train, y_train)\n",
165
+ "\n",
166
+ "# Test the model\n",
167
+ "y_pred = model.predict(X_test)\n",
168
+ "mean_absolute_error(y_test, y_pred)"
169
+ ]
170
+ }
171
+ ],
172
+ "metadata": {
173
+ "kernelspec": {
174
+ "display_name": "Python 3 (ipykernel)",
175
+ "language": "python",
176
+ "name": "python3"
177
+ },
178
+ "language_info": {
179
+ "codemirror_mode": {
180
+ "name": "ipython",
181
+ "version": 3
182
+ },
183
+ "file_extension": ".py",
184
+ "mimetype": "text/x-python",
185
+ "name": "python",
186
+ "nbconvert_exporter": "python",
187
+ "pygments_lexer": "ipython3",
188
+ "version": "3.10.12"
189
+ }
190
+ },
191
+ "nbformat": 4,
192
+ "nbformat_minor": 5
193
+ }
@@ -0,0 +1,7 @@
1
+ [flake8]
2
+ max-line-length = 88
3
+ per-file-ignores = [
4
+ utils/data/__init__.py:F401,
5
+ utils/modelling/__init__.py:F401,
6
+ utils/deployment/__init__.py:F401,
7
+ ]