direl-ts-tool-kit 0.3.2__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/PKG-INFO +1 -1
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/utilities/data_prep.py +41 -5
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit.egg-info/PKG-INFO +1 -1
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/setup.py +1 -1
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/LICENCE +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/README.md +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/__init__.py +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/plot/__init__.py +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/plot/plot_style.py +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/plot/plot_ts.py +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/utilities/__init__.py +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit.egg-info/SOURCES.txt +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit.egg-info/dependency_links.txt +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit.egg-info/requires.txt +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit.egg-info/top_level.txt +0 -0
- {direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/setup.cfg +0 -0
{direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit/utilities/data_prep.py
RENAMED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
import pandas as pd
|
|
2
3
|
|
|
3
4
|
|
|
@@ -5,7 +6,7 @@ def parse_datetime_index(df_raw, date_column="date", format=None):
|
|
|
5
6
|
"""
|
|
6
7
|
Parses a specified column into datetime objects and sets it as the DataFrame index.
|
|
7
8
|
|
|
8
|
-
This function prepares raw data for time series analysis by ensuring the
|
|
9
|
+
This function prepares raw data for time series analysis by ensuring the
|
|
9
10
|
DataFrame is indexed by the correct datetime type.
|
|
10
11
|
|
|
11
12
|
Parameters
|
|
@@ -16,15 +17,15 @@ def parse_datetime_index(df_raw, date_column="date", format=None):
|
|
|
16
17
|
The name of the column in 'df_raw' that contains the date/time information.
|
|
17
18
|
Defaults to "date".
|
|
18
19
|
format : str, optional
|
|
19
|
-
The explicit format string (e.g., '%Y%m%d', '%Y-%m-%d %H:%M:%S')
|
|
20
|
-
to parse the dates, passed to `pd.to_datetime`. If None (default),
|
|
20
|
+
The explicit format string (e.g., '%Y%m%d', '%Y-%m-%d %H:%M:%S')
|
|
21
|
+
to parse the dates, passed to `pd.to_datetime`. If None (default),
|
|
21
22
|
Pandas attempts to infer the format automatically.
|
|
22
23
|
|
|
23
24
|
Returns
|
|
24
25
|
-------
|
|
25
26
|
df_ts : pd.DataFrame
|
|
26
|
-
A copy of the original DataFrame with the specified date column removed
|
|
27
|
-
and set as the DatetimeIndex. The returned DataFrame is ready for
|
|
27
|
+
A copy of the original DataFrame with the specified date column removed
|
|
28
|
+
and set as the DatetimeIndex. The returned DataFrame is ready for
|
|
28
29
|
time series operations.
|
|
29
30
|
"""
|
|
30
31
|
if not format:
|
|
@@ -123,3 +124,38 @@ def reindex_and_aggregate(df_ts, column_name, freq="MS"):
|
|
|
123
124
|
df_ts_new.notnull().apply(pd.Series.value_counts)
|
|
124
125
|
|
|
125
126
|
return df_ts_new
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def remove_outliers_by_threshold(df_ts, column_name, lower_bound, upper_bound):
|
|
130
|
+
"""
|
|
131
|
+
Replaces values in a specified column with NaN if they fall outside
|
|
132
|
+
a defined range (outlier removal).
|
|
133
|
+
|
|
134
|
+
This function identifies data points that are either below the lower
|
|
135
|
+
bound or above the upper bound and treats them as missing data.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
df_ts : pd.DataFrame
|
|
140
|
+
The time series DataFrame (must have a DatetimeIndex).
|
|
141
|
+
column_name : str
|
|
142
|
+
The name of the column where outlier detection will be performed (e.g., 'Temperature').
|
|
143
|
+
lower_bound : float or int
|
|
144
|
+
The minimum acceptable value. Values strictly below this bound are replaced by NaN.
|
|
145
|
+
upper_bound : float or int
|
|
146
|
+
The maximum acceptable value. Values strictly above this bound are replaced by NaN.
|
|
147
|
+
|
|
148
|
+
Returns
|
|
149
|
+
-------
|
|
150
|
+
pd.DataFrame
|
|
151
|
+
The DataFrame with outlier values in the specified column replaced by np.nan.
|
|
152
|
+
"""
|
|
153
|
+
df_out = df_ts.copy()
|
|
154
|
+
|
|
155
|
+
outlier_index = df_out[
|
|
156
|
+
(df_out[column_name] < lower_bound) | (df_out[column_name] > upper_bound)
|
|
157
|
+
].index
|
|
158
|
+
|
|
159
|
+
df_out.loc[outlier_index, column_name] = np.nan
|
|
160
|
+
|
|
161
|
+
return df_out
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="direl-ts-tool-kit",
|
|
5
|
-
version="0.
|
|
5
|
+
version="0.4.1",
|
|
6
6
|
description="A toolbox for time series analysis and visualization.",
|
|
7
7
|
long_description=open("README.md", encoding="utf-8").read(),
|
|
8
8
|
long_description_content_type="text/markdown",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
{direl_ts_tool_kit-0.3.2 → direl_ts_tool_kit-0.4.1}/direl_ts_tool_kit.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|