edmt 1.0.1.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edmt-1.0.1.dev0/LICENSE +21 -0
- edmt-1.0.1.dev0/PKG-INFO +26 -0
- edmt-1.0.1.dev0/README.md +3 -0
- edmt-1.0.1.dev0/edmt/__init__.py +78 -0
- edmt-1.0.1.dev0/edmt/analysis/__init__.py +7 -0
- edmt-1.0.1.dev0/edmt/analysis/analysis.py +2 -0
- edmt-1.0.1.dev0/edmt/base/__init__.py +5 -0
- edmt-1.0.1.dev0/edmt/base/base.py +2 -0
- edmt-1.0.1.dev0/edmt/contrib/__init__.py +16 -0
- edmt-1.0.1.dev0/edmt/contrib/utils.py +146 -0
- edmt-1.0.1.dev0/edmt/conversion/__init__.py +19 -0
- edmt-1.0.1.dev0/edmt/conversion/computational.py +2 -0
- edmt-1.0.1.dev0/edmt/conversion/conversion.py +302 -0
- edmt-1.0.1.dev0/edmt/mapping/__init__.py +7 -0
- edmt-1.0.1.dev0/edmt/mapping/mapping.py +207 -0
- edmt-1.0.1.dev0/edmt/mapping/maps.py +77 -0
- edmt-1.0.1.dev0/edmt/models/__init__.py +15 -0
- edmt-1.0.1.dev0/edmt/models/drones.py +533 -0
- edmt-1.0.1.dev0/edmt/plotting/__init__.py +0 -0
- edmt-1.0.1.dev0/edmt.egg-info/PKG-INFO +26 -0
- edmt-1.0.1.dev0/edmt.egg-info/SOURCES.txt +26 -0
- edmt-1.0.1.dev0/edmt.egg-info/dependency_links.txt +1 -0
- edmt-1.0.1.dev0/edmt.egg-info/entry_points.txt +2 -0
- edmt-1.0.1.dev0/edmt.egg-info/requires.txt +9 -0
- edmt-1.0.1.dev0/edmt.egg-info/top_level.txt +1 -0
- edmt-1.0.1.dev0/pyproject.toml +39 -0
- edmt-1.0.1.dev0/setup.cfg +4 -0
- edmt-1.0.1.dev0/tests/test_airdata.py +5 -0
edmt-1.0.1.dev0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 envdmt
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
edmt-1.0.1.dev0/PKG-INFO
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: edmt
|
|
3
|
+
Version: 1.0.1.dev0
|
|
4
|
+
Summary: Environmental Data Management Toolbox
|
|
5
|
+
Author-email: "Odero, Kuloba & musasia" <franodex10@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/envqwewdmt/EDMT
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: contextily>=1.4.0
|
|
14
|
+
Requires-Dist: contourpy>=1.2.1
|
|
15
|
+
Requires-Dist: fiona==1.9.6
|
|
16
|
+
Requires-Dist: folium>=0.18.0
|
|
17
|
+
Requires-Dist: geopandas>=0.12.2
|
|
18
|
+
Requires-Dist: mapclassify>=2.8.0
|
|
19
|
+
Requires-Dist: plotly>=5.24.1
|
|
20
|
+
Requires-Dist: seaborn>=0.13.2
|
|
21
|
+
Requires-Dist: tqdm>=4
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# edmt
|
|
25
|
+
|
|
26
|
+
### Documentation
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from edmt import (
|
|
2
|
+
analysis,
|
|
3
|
+
base,
|
|
4
|
+
contrib,
|
|
5
|
+
conversion,
|
|
6
|
+
mapping,
|
|
7
|
+
models,
|
|
8
|
+
plotting
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
import importlib.metadata
|
|
12
|
+
|
|
13
|
+
ASCII = r"""
|
|
14
|
+
___ ___ __ __ _____
|
|
15
|
+
| __| \| \/ |_ _|
|
|
16
|
+
| _|| |) | |\/| | | |
|
|
17
|
+
|___|___/|_| |_| |_|
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
__initialized = False
|
|
21
|
+
|
|
22
|
+
# Package version
|
|
23
|
+
__version__ = importlib.metadata.version("edmt")
|
|
24
|
+
|
|
25
|
+
def init(silent=False, force=False):
|
|
26
|
+
"""
|
|
27
|
+
Initializes the environment with EDMT-specific customizations.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
silent : bool, optional
|
|
32
|
+
Suppresses console output (default is False).
|
|
33
|
+
force : bool, optional
|
|
34
|
+
Forces re-initialization even if already initialized (default is False).
|
|
35
|
+
"""
|
|
36
|
+
global __initialized
|
|
37
|
+
if __initialized and not force:
|
|
38
|
+
if not silent:
|
|
39
|
+
print("EDMT already initialized.")
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
import pandas as pd
|
|
43
|
+
|
|
44
|
+
pd.set_option("display.max_columns", None)
|
|
45
|
+
pd.options.plotting.backend = "plotly"
|
|
46
|
+
pd.options.mode.copy_on_write = True
|
|
47
|
+
|
|
48
|
+
from tqdm.auto import tqdm
|
|
49
|
+
|
|
50
|
+
tqdm.pandas()
|
|
51
|
+
|
|
52
|
+
import warnings
|
|
53
|
+
|
|
54
|
+
from shapely.errors import ShapelyDeprecationWarning
|
|
55
|
+
|
|
56
|
+
warnings.filterwarnings(action="ignore", category=ShapelyDeprecationWarning)
|
|
57
|
+
warnings.filterwarnings(action="ignore", category=FutureWarning)
|
|
58
|
+
warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*")
|
|
59
|
+
|
|
60
|
+
import plotly.io as pio # type: ignore[import-untyped]
|
|
61
|
+
|
|
62
|
+
pio.templates.default = "seaborn"
|
|
63
|
+
|
|
64
|
+
__initialized = True
|
|
65
|
+
if not silent:
|
|
66
|
+
print(ASCII)
|
|
67
|
+
print("EDMT initialized successfully.")
|
|
68
|
+
|
|
69
|
+
__all__ = [
|
|
70
|
+
"analysis",
|
|
71
|
+
"base",
|
|
72
|
+
"contrib",
|
|
73
|
+
"init",
|
|
74
|
+
"conversion",
|
|
75
|
+
"mapping",
|
|
76
|
+
"models",
|
|
77
|
+
"plotting"
|
|
78
|
+
]
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from dateutil import parser
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import geopandas as gpd
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from typing import Union, List
|
|
8
|
+
|
|
9
|
+
def clean_vars(addl_kwargs={}, **kwargs):
|
|
10
|
+
for k in addl_kwargs.keys():
|
|
11
|
+
print(f"Warning: {k} is a non-standard parameter. Results may be unexpected.")
|
|
12
|
+
clea_ = {k: v for k, v in {**addl_kwargs, **kwargs}.items() if v is not None}
|
|
13
|
+
return clea_
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def normalize_column(df, col):
|
|
17
|
+
# print(col)
|
|
18
|
+
for k, v in pd.json_normalize(df.pop(col), sep="__").add_prefix(f"{col}__").items():
|
|
19
|
+
df[k] = v.values
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def clean_time_cols(df,columns = []):
|
|
23
|
+
if columns:
|
|
24
|
+
time_cols = [columns]
|
|
25
|
+
for col in time_cols:
|
|
26
|
+
if col in df.columns and not pd.api.types.is_datetime64_ns_dtype(df[col]):
|
|
27
|
+
# convert x is not None to pd.isna(x) is False
|
|
28
|
+
df[col] = df[col].apply(lambda x: pd.to_datetime(parser.parse(x), utc=True) if not pd.isna(x) else None)
|
|
29
|
+
return df
|
|
30
|
+
else:
|
|
31
|
+
print("Select a column with Time format")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def format_iso_time(date_string: str) -> str:
|
|
35
|
+
try:
|
|
36
|
+
return pd.to_datetime(date_string).isoformat()
|
|
37
|
+
except ValueError:
|
|
38
|
+
raise ValueError(f"Failed to parse timestamp'{date_string}'")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def norm_exp(df: pd.DataFrame, cols : Union[str, list]) -> pd.DataFrame:
|
|
42
|
+
"""
|
|
43
|
+
Normalizes specified columns containing list of dicts,
|
|
44
|
+
expands them into separate rows if needed,
|
|
45
|
+
and appends new columns to the original dataframe with prefixing.
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
- df: Original pandas DataFrame
|
|
49
|
+
- cols: str or list of str, names of columns to normalize
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
- Modified DataFrame with normalized and expanded data
|
|
53
|
+
"""
|
|
54
|
+
if isinstance(cols, str):
|
|
55
|
+
cols = [cols]
|
|
56
|
+
|
|
57
|
+
result_df = df.copy()
|
|
58
|
+
for col in cols:
|
|
59
|
+
if col not in df.columns:
|
|
60
|
+
raise ValueError(f"Column '{col}' not found in DataFrame.")
|
|
61
|
+
|
|
62
|
+
s = df[col]
|
|
63
|
+
normalized = s.apply(lambda x: pd.json_normalize(x) if isinstance(x, list) and x else pd.DataFrame())
|
|
64
|
+
def add_prefix(df_sub, prefix):
|
|
65
|
+
df_sub.cols = [f"{prefix}_{subcol}" for subcol in df_sub.columns]
|
|
66
|
+
return df_sub
|
|
67
|
+
|
|
68
|
+
normalized = normalized.map(lambda df_sub: add_prefix(df_sub, col))
|
|
69
|
+
normalized_stacked = (
|
|
70
|
+
pd.concat(normalized.tolist(), keys=df.index)
|
|
71
|
+
.reset_index(level=1, drop=True)
|
|
72
|
+
.rename_axis('original_index')
|
|
73
|
+
.reset_index()
|
|
74
|
+
)
|
|
75
|
+
result_df = result_df.drop(columns=[col], errors='ignore')
|
|
76
|
+
|
|
77
|
+
return result_df.merge(
|
|
78
|
+
normalized_stacked,
|
|
79
|
+
left_index=True,
|
|
80
|
+
right_on='original_index',
|
|
81
|
+
how='left'
|
|
82
|
+
).drop(columns=['original_index'])
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def append_cols(df: pd.DataFrame, cols: Union[str, list]):
|
|
86
|
+
"""
|
|
87
|
+
Move specified column(s) to the end of the DataFrame.
|
|
88
|
+
|
|
89
|
+
Parameters:
|
|
90
|
+
df (pd.DataFrame): Input DataFrame.
|
|
91
|
+
cols (str or list): Column name(s) to move to the end.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
pd.DataFrame: DataFrame with columns reordered.
|
|
95
|
+
"""
|
|
96
|
+
if isinstance(cols, str):
|
|
97
|
+
cols = [cols]
|
|
98
|
+
|
|
99
|
+
remaining_cols = [col for col in df.columns if col not in cols]
|
|
100
|
+
return df[remaining_cols + cols]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def norm_exp(df: pd.DataFrame,
|
|
104
|
+
cols: Union[str, list]
|
|
105
|
+
) -> pd.DataFrame:
|
|
106
|
+
"""
|
|
107
|
+
Normalizes specified columns containing list of dicts,
|
|
108
|
+
expands them into separate rows if needed,
|
|
109
|
+
and appends new columns to the original dataframe with prefixing.
|
|
110
|
+
|
|
111
|
+
Parameters:
|
|
112
|
+
- df: Original pandas DataFrame
|
|
113
|
+
- columns: str or list of str, names of columns to normalize
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
- Modified DataFrame with normalized and expanded data
|
|
117
|
+
"""
|
|
118
|
+
if isinstance(cols, str):
|
|
119
|
+
cols = [cols]
|
|
120
|
+
|
|
121
|
+
result_df = df.copy()
|
|
122
|
+
for col in cols:
|
|
123
|
+
if col not in df.columns:
|
|
124
|
+
raise ValueError(f"Column '{col}' not found in DataFrame.")
|
|
125
|
+
|
|
126
|
+
s = df[col]
|
|
127
|
+
normalized = s.apply(lambda x: pd.json_normalize(x) if isinstance(x, list) and x else pd.DataFrame())
|
|
128
|
+
def add_prefix(df_sub, prefix):
|
|
129
|
+
df_sub.columns = [f"{prefix}_{subcol}" for subcol in df_sub.columns]
|
|
130
|
+
return df_sub
|
|
131
|
+
|
|
132
|
+
normalized = normalized.map(lambda df_sub: add_prefix(df_sub, col))
|
|
133
|
+
normalized_stacked = (
|
|
134
|
+
pd.concat(normalized.tolist(), keys=df.index)
|
|
135
|
+
.reset_index(level=1, drop=True)
|
|
136
|
+
.rename_axis('original_index')
|
|
137
|
+
.reset_index()
|
|
138
|
+
)
|
|
139
|
+
result_df = result_df.drop(columns=[col], errors='ignore')
|
|
140
|
+
|
|
141
|
+
return result_df.merge(
|
|
142
|
+
normalized_stacked,
|
|
143
|
+
left_index=True,
|
|
144
|
+
right_on='original_index',
|
|
145
|
+
how='left'
|
|
146
|
+
).drop(columns=['original_index'])
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .conversion import (
|
|
2
|
+
sdf_to_gdf,
|
|
3
|
+
generate_uuid,
|
|
4
|
+
get_utm_epsg,
|
|
5
|
+
to_gdf,
|
|
6
|
+
convert_distance,
|
|
7
|
+
convert_time,
|
|
8
|
+
convert_speed
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'sdf_to_gdf',
|
|
13
|
+
'generate_uuid',
|
|
14
|
+
'get_utm_epsg',
|
|
15
|
+
'to_gdf',
|
|
16
|
+
'convert_distance',
|
|
17
|
+
'convert_time',
|
|
18
|
+
'convert_speed'
|
|
19
|
+
]
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import geopandas as gpd
|
|
4
|
+
from shapely import make_valid
|
|
5
|
+
from edmt.contrib.utils import (
|
|
6
|
+
clean_vars
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
A unit of time is any particular time interval, used as a standard way of measuring or
|
|
11
|
+
expressing duration. The base unit of time in the International System of Units (SI),
|
|
12
|
+
and by extension most of the Western world, is the second, defined as about 9 billion
|
|
13
|
+
oscillations of the caesium atom.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
time_chart: dict[str, float] = {
|
|
18
|
+
"microseconds": 0.000001, # 1 μs = 1e-6 seconds
|
|
19
|
+
"microsecond": 0.000001,
|
|
20
|
+
"µs": 0.000001,
|
|
21
|
+
"milliseconds": 0.001, # 1 ms = 1e-3 seconds
|
|
22
|
+
"millisecond": 0.001,
|
|
23
|
+
"ms": 0.001,
|
|
24
|
+
"seconds": 1.0, # Base unit
|
|
25
|
+
"second": 1.0,
|
|
26
|
+
"s": 1.0,
|
|
27
|
+
"minutes": 60.0, # 1 min = 60 sec
|
|
28
|
+
"minute": 60.0,
|
|
29
|
+
"min": 60.0,
|
|
30
|
+
"m": 60.0,
|
|
31
|
+
"hours": 3600.0, # 1 hr = 60 min = 3600 sec
|
|
32
|
+
"hour": 3600.0,
|
|
33
|
+
"hr": 3600.0,
|
|
34
|
+
"h": 3600.0,
|
|
35
|
+
"days": 86400.0, # 1 day = 24 hr = 86400 sec
|
|
36
|
+
"day": 86400.0,
|
|
37
|
+
"d": 86400.0,
|
|
38
|
+
"weeks": 604800.0, # 1 week = 7 days = 604800 sec
|
|
39
|
+
"week": 604800.0,
|
|
40
|
+
"wk": 604800.0,
|
|
41
|
+
"w": 604800.0,
|
|
42
|
+
"months": 2629800.0, # Approx. 30.44 days = 1/12 year
|
|
43
|
+
"month": 2629800.0,
|
|
44
|
+
"years": 31557600.0, # Julian year = 365.25 days
|
|
45
|
+
"year": 31557600.0,
|
|
46
|
+
"yr": 31557600.0,
|
|
47
|
+
"y": 31557600.0,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
time_chart_inverse: dict[str, float] = {
|
|
51
|
+
key: 1.0 / value for key, value in time_chart.items()
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
speed_chart: dict[str, float] = {
|
|
55
|
+
"km/h": 1.0,
|
|
56
|
+
"m/s": 3.6,
|
|
57
|
+
"mph": 1.609344,
|
|
58
|
+
"knot": 1.852,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
speed_chart_inverse: dict[str, float] = {
|
|
62
|
+
"km/h": 1.0,
|
|
63
|
+
"m/s": 0.277777778,
|
|
64
|
+
"mph": 0.621371192,
|
|
65
|
+
"knot": 0.539956803,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
UNIT_SYMBOL = {
|
|
69
|
+
"meter": "m", "meters": "m",
|
|
70
|
+
"kilometer": "km", "kilometers": "km",
|
|
71
|
+
"centimeter": "cm", "centimeters": "cm",
|
|
72
|
+
"millimeter": "mm", "millimeters": "mm",
|
|
73
|
+
"mile": "mi", "miles": "mi",
|
|
74
|
+
"yard": "yd", "yards": "yd",
|
|
75
|
+
"foot": "ft", "feet": "ft",
|
|
76
|
+
"inch": "in", "inches": "in",
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
METRIC_CONVERSION = {
|
|
80
|
+
"mm": -3,
|
|
81
|
+
"cm": -2,
|
|
82
|
+
"dm": -1,
|
|
83
|
+
"m": 0,
|
|
84
|
+
"dam": 1,
|
|
85
|
+
"hm": 2,
|
|
86
|
+
"km": 3,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
distance_chart = {
|
|
90
|
+
"mm": 0.001,
|
|
91
|
+
"cm": 0.01,
|
|
92
|
+
"dm": 0.1,
|
|
93
|
+
"m": 1.0,
|
|
94
|
+
"dam": 10.0,
|
|
95
|
+
"hm": 100.0,
|
|
96
|
+
"km": 1000.0,
|
|
97
|
+
"in": 0.0254,
|
|
98
|
+
"ft": 0.3048,
|
|
99
|
+
"yd": 0.9144,
|
|
100
|
+
"mi": 1609.344,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
def sdf_to_gdf(sdf, crs=None):
|
|
104
|
+
"""
|
|
105
|
+
Converts a spatial dataframe (sdf) to a geodataframe (gdf) with a user-defined CRS.
|
|
106
|
+
|
|
107
|
+
Parameters:
|
|
108
|
+
- sdf: Spatial DataFrame to convert.
|
|
109
|
+
- crs: Coordinate Reference System (default is EPSG:4326).
|
|
110
|
+
|
|
111
|
+
Steps:
|
|
112
|
+
1. Creates a copy of the input spatial dataframe to avoid modifying the original.
|
|
113
|
+
2. Filters out rows where the 'SHAPE' column is NaN (invalid geometries).
|
|
114
|
+
3. Converts the filtered dataframe to a GeoDataFrame using the 'SHAPE' column for geometry and sets the CRS.
|
|
115
|
+
4. Applies the `make_valid` function to the geometry column to correct any invalid geometries.
|
|
116
|
+
5. Drops the columns 'Shape__Area', 'Shape__Length', and 'SHAPE', if they exist, to clean up the GeoDataFrame.
|
|
117
|
+
6. Returns the resulting GeoDataFrame.
|
|
118
|
+
"""
|
|
119
|
+
# Validate input DataFrame
|
|
120
|
+
if not isinstance(sdf, pd.DataFrame):
|
|
121
|
+
raise ValueError("Input must be a pandas DataFrame.")
|
|
122
|
+
if sdf.empty:
|
|
123
|
+
raise ValueError("DataFrame is empty. Cannot generate UUIDs for an empty DataFrame.")
|
|
124
|
+
|
|
125
|
+
# clean vars
|
|
126
|
+
params = clean_vars(
|
|
127
|
+
shape = "SHAPE",
|
|
128
|
+
geometry = "geometry",
|
|
129
|
+
columns = ["Shape__Area", "Shape__Length", "SHAPE"],
|
|
130
|
+
crs=crs
|
|
131
|
+
)
|
|
132
|
+
assert params.get("geometry") is None
|
|
133
|
+
print("Geometry column is present and valid")
|
|
134
|
+
|
|
135
|
+
tmp = sdf.copy()
|
|
136
|
+
tmp = tmp[~tmp[params.get("shape")].isna()]
|
|
137
|
+
|
|
138
|
+
if crs:
|
|
139
|
+
crs=params.get("crs")
|
|
140
|
+
else:
|
|
141
|
+
crs=4326
|
|
142
|
+
|
|
143
|
+
gdf = gpd.GeoDataFrame(
|
|
144
|
+
tmp,
|
|
145
|
+
geometry=tmp[params.get("shape")],
|
|
146
|
+
crs=crs
|
|
147
|
+
)
|
|
148
|
+
gdf['geometry'] = gdf[params.get("geometry")].apply(lambda x: make_valid(x)) # Validate geometries
|
|
149
|
+
gdf.drop(columns=params.get("columns"), errors='ignore', inplace=True)
|
|
150
|
+
print("COnverted Spatial DataFrame to GeoDataFrame")
|
|
151
|
+
return gdf
|
|
152
|
+
|
|
153
|
+
def generate_uuid(df, index=False):
|
|
154
|
+
"""
|
|
155
|
+
Adds a unique 'uuid' column with UUIDs to the DataFrame if no existing UUID-like column is found.
|
|
156
|
+
Does not generate new UUIDs if UUIDs are already assigned in a 'uuid' column.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
df (pd.DataFrame): The DataFrame to which UUIDs will be added.
|
|
160
|
+
index (bool): If True, sets 'uuid' as the index. Otherwise, 'uuid' remains a column.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
pd.DataFrame: DataFrame with a 'uuid' column added if no UUID-like column exists.
|
|
164
|
+
Raises:
|
|
165
|
+
ValueError: If 'df' is not a DataFrame or if it's empty.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
# Validate input DataFrame
|
|
169
|
+
if not isinstance(df, pd.DataFrame):
|
|
170
|
+
raise ValueError("Input must be a pandas DataFrame.")
|
|
171
|
+
if df.empty:
|
|
172
|
+
raise ValueError("DataFrame is empty. Cannot generate UUIDs for an empty DataFrame.")
|
|
173
|
+
|
|
174
|
+
# Define UUID pattern
|
|
175
|
+
uuid_pattern = r'^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$'
|
|
176
|
+
|
|
177
|
+
# Check for existing UUID-like columns
|
|
178
|
+
for col in df.columns:
|
|
179
|
+
if pd.api.types.is_string_dtype(df[col]) and df[col].str.match(uuid_pattern).all():
|
|
180
|
+
print(f"Column '{col}' contains UUID-like values.")
|
|
181
|
+
if index:
|
|
182
|
+
return df.set_index(col).reset_index()
|
|
183
|
+
else:
|
|
184
|
+
return df #
|
|
185
|
+
|
|
186
|
+
print("No UUID-like column found. Generating 'uuid' column in the DataFrame.")
|
|
187
|
+
|
|
188
|
+
if 'uuid' not in df.columns:
|
|
189
|
+
df['uuid'] = [str(uuid.uuid4()).lower() for _ in range(len(df))]
|
|
190
|
+
else:
|
|
191
|
+
df['uuid'] = df['uuid'].apply(lambda x: x if pd.notnull(x) else str(uuid.uuid4()).lower())
|
|
192
|
+
|
|
193
|
+
if index:
|
|
194
|
+
df = df.set_index('uuid').reset_index()
|
|
195
|
+
|
|
196
|
+
return df
|
|
197
|
+
|
|
198
|
+
def get_utm_epsg(longitude=None):
|
|
199
|
+
if longitude is None:
|
|
200
|
+
print("KeyError : Select column with longitude values")
|
|
201
|
+
else:
|
|
202
|
+
zone_number = int((longitude + 180) / 6) + 1
|
|
203
|
+
hemisphere = '6' if longitude >= 0 else '7' # 6 for Northern, 7 for Southern Hemisphere
|
|
204
|
+
return f"32{hemisphere}{zone_number:02d}"
|
|
205
|
+
|
|
206
|
+
def to_gdf(df):
|
|
207
|
+
longitude, latitude = (0, 1) if isinstance(df["location"].iat[0], list) else ("longitude", "latitude")
|
|
208
|
+
return gpd.GeoDataFrame(
|
|
209
|
+
df,
|
|
210
|
+
geometry=gpd.points_from_xy(df["location"].str[longitude], df["location"].str[latitude]),
|
|
211
|
+
crs=4326,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
def convert_time(time_value: float, unit_from: str, unit_to: str) -> float:
|
|
215
|
+
"""
|
|
216
|
+
Converts a given time value between different units.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
time_value (float): The numerical value of the time.
|
|
220
|
+
unit_from (str): The original unit of time.
|
|
221
|
+
unit_to (str): The target unit to convert to.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
float: The converted time value.
|
|
225
|
+
|
|
226
|
+
Raises:
|
|
227
|
+
ValueError: If the provided units are not supported or value is invalid.
|
|
228
|
+
"""
|
|
229
|
+
if not isinstance(time_value, (int, float)) or time_value < 0:
|
|
230
|
+
raise ValueError("'time_value' must be a non-negative number.")
|
|
231
|
+
|
|
232
|
+
# Normalize input unit names
|
|
233
|
+
unit_from = unit_from.lower().strip()
|
|
234
|
+
unit_to = unit_to.lower().strip()
|
|
235
|
+
|
|
236
|
+
unit_from = {
|
|
237
|
+
"us": "microseconds",
|
|
238
|
+
"μs": "microseconds",
|
|
239
|
+
"microsec": "microseconds",
|
|
240
|
+
"usec": "microseconds"
|
|
241
|
+
}.get(unit_from, unit_from)
|
|
242
|
+
|
|
243
|
+
unit_to = {
|
|
244
|
+
"us": "microseconds",
|
|
245
|
+
"μs": "microseconds",
|
|
246
|
+
"microsec": "microseconds",
|
|
247
|
+
"usec": "microseconds"
|
|
248
|
+
}.get(unit_to, unit_to)
|
|
249
|
+
|
|
250
|
+
if unit_from not in time_chart:
|
|
251
|
+
raise ValueError(f"Invalid 'unit_from': {unit_from}. Supported units: {', '.join(time_chart.keys())}")
|
|
252
|
+
if unit_to not in time_chart:
|
|
253
|
+
raise ValueError(f"Invalid 'unit_to': {unit_to}. Supported units: {', '.join(time_chart.keys())}")
|
|
254
|
+
|
|
255
|
+
# Convert to seconds first, then to target unit
|
|
256
|
+
seconds = time_value * time_chart[unit_from]
|
|
257
|
+
converted = seconds / time_chart[unit_to]
|
|
258
|
+
|
|
259
|
+
return round(converted, 3)
|
|
260
|
+
|
|
261
|
+
def convert_speed(speed: float, unit_from: str, unit_to: str) -> float:
|
|
262
|
+
if unit_to not in speed_chart or unit_from not in speed_chart_inverse:
|
|
263
|
+
msg = (
|
|
264
|
+
f"Incorrect 'from_type' or 'to_type' value: {unit_from!r}, {unit_to!r}\n"
|
|
265
|
+
f"Valid values are: {', '.join(speed_chart_inverse)}"
|
|
266
|
+
)
|
|
267
|
+
raise ValueError(msg)
|
|
268
|
+
return round(speed * speed_chart[unit_from] * speed_chart_inverse[unit_to], 3)
|
|
269
|
+
|
|
270
|
+
def convert_distance(value: float, from_type: str, to_type: str) -> float:
|
|
271
|
+
"""
|
|
272
|
+
Converts distance values between different units including metric and imperial.
|
|
273
|
+
|
|
274
|
+
Supports:
|
|
275
|
+
Metric: mm, cm, dm, m, dam, hm, km
|
|
276
|
+
Imperial: in, ft, yd, mi
|
|
277
|
+
|
|
278
|
+
Handles plural forms, full names, and inconsistent casing.
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
from_sanitized = from_type.lower().strip("s")
|
|
282
|
+
to_sanitized = to_type.lower().strip("s")
|
|
283
|
+
|
|
284
|
+
from_sanitized = UNIT_SYMBOL.get(from_sanitized, from_sanitized)
|
|
285
|
+
to_sanitized = UNIT_SYMBOL.get(to_sanitized, to_sanitized)
|
|
286
|
+
|
|
287
|
+
valid_units = set(distance_chart.keys())
|
|
288
|
+
if from_sanitized not in valid_units:
|
|
289
|
+
raise ValueError(f"Invalid 'from_type': {from_type!r}. Valid units: {', '.join(valid_units)}")
|
|
290
|
+
if to_sanitized not in valid_units:
|
|
291
|
+
raise ValueError(f"Invalid 'to_type': {to_type!r}. Valid units: {', '.join(valid_units)}")
|
|
292
|
+
|
|
293
|
+
if from_sanitized in METRIC_CONVERSION and to_sanitized in METRIC_CONVERSION:
|
|
294
|
+
from_exp = METRIC_CONVERSION[from_sanitized]
|
|
295
|
+
to_exp = METRIC_CONVERSION[to_sanitized]
|
|
296
|
+
exponent_diff = from_exp - to_exp
|
|
297
|
+
return round(value * pow(10, exponent_diff), 3)
|
|
298
|
+
|
|
299
|
+
value_in_meters = value * distance_chart[from_sanitized]
|
|
300
|
+
converted = value_in_meters / distance_chart[to_sanitized]
|
|
301
|
+
|
|
302
|
+
return round(converted, 3)
|