direl-ts-tool-kit 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import numpy as np
1
2
  import pandas as pd
2
3
 
3
4
 
@@ -5,7 +6,7 @@ def parse_datetime_index(df_raw, date_column="date", format=None):
5
6
  """
6
7
  Parses a specified column into datetime objects and sets it as the DataFrame index.
7
8
 
8
- This function prepares raw data for time series analysis by ensuring the
9
+ This function prepares raw data for time series analysis by ensuring the
9
10
  DataFrame is indexed by the correct datetime type.
10
11
 
11
12
  Parameters
@@ -16,15 +17,15 @@ def parse_datetime_index(df_raw, date_column="date", format=None):
16
17
  The name of the column in 'df_raw' that contains the date/time information.
17
18
  Defaults to "date".
18
19
  format : str, optional
19
- The explicit format string (e.g., '%Y%m%d', '%Y-%m-%d %H:%M:%S')
20
- to parse the dates, passed to `pd.to_datetime`. If None (default),
20
+ The explicit format string (e.g., '%Y%m%d', '%Y-%m-%d %H:%M:%S')
21
+ to parse the dates, passed to `pd.to_datetime`. If None (default),
21
22
  Pandas attempts to infer the format automatically.
22
23
 
23
24
  Returns
24
25
  -------
25
26
  df_ts : pd.DataFrame
26
- A copy of the original DataFrame with the specified date column removed
27
- and set as the DatetimeIndex. The returned DataFrame is ready for
27
+ A copy of the original DataFrame with the specified date column removed
28
+ and set as the DatetimeIndex. The returned DataFrame is ready for
28
29
  time series operations.
29
30
  """
30
31
  if not format:
@@ -123,3 +124,38 @@ def reindex_and_aggregate(df_ts, column_name, freq="MS"):
123
124
  df_ts_new.notnull().apply(pd.Series.value_counts)
124
125
 
125
126
  return df_ts_new
127
+
128
+
129
+ def remove_outliers_by_threshold(df_ts, column_name, lower_bound, upper_bound):
130
+ """
131
+ Replaces values in a specified column with NaN if they fall outside
132
+ a defined range (outlier removal).
133
+
134
+ This function identifies data points that are either below the lower
135
+ bound or above the upper bound and treats them as missing data.
136
+
137
+ Parameters
138
+ ----------
139
+ df_ts : pd.DataFrame
140
+ The time series DataFrame (must have a DatetimeIndex).
141
+ column_name : str
142
+ The name of the column where outlier detection will be performed (e.g., 'Temperature').
143
+ lower_bound : float or int
144
+ The minimum acceptable value. Values strictly below this bound are replaced by NaN.
145
+ upper_bound : float or int
146
+ The maximum acceptable value. Values strictly above this bound are replaced by NaN.
147
+
148
+ Returns
149
+ -------
150
+ pd.DataFrame
151
+ The DataFrame with outlier values in the specified column replaced by np.nan.
152
+ """
153
+ df_out = df_ts.copy()
154
+
155
+ outlier_index = df_out[
156
+ (df_out[column_name] < lower_bound) | (df_out[column_name] > upper_bound)
157
+ ].index
158
+
159
+ df_out.loc[outlier_index, column_name] = np.nan
160
+
161
+ return df_out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: direl-ts-tool-kit
3
- Version: 0.3.2
3
+ Version: 0.4.1
4
4
  Summary: A toolbox for time series analysis and visualization.
5
5
  Home-page: https://gitlab.com/direl/direl_tool_kit
6
6
  Author: Diego Restrepo-Leal
@@ -3,9 +3,9 @@ direl_ts_tool_kit/plot/__init__.py,sha256=CMwyv-kiE74nwr3MJPL7gWIJmcfZ8UQCRu7mBG
3
3
  direl_ts_tool_kit/plot/plot_style.py,sha256=vhzcDa3LzgkHuy-GnliofGZ8TDntkm3_1C5kgl2Gx3E,1010
4
4
  direl_ts_tool_kit/plot/plot_ts.py,sha256=OPmdaXGUv_oNaXQ4epiPX1fKTtxyaWuoGfHPa5if96U,5008
5
5
  direl_ts_tool_kit/utilities/__init__.py,sha256=jMtxYZUtwlhgI99sxe_8MMzsDnxtbTP7Ivh9tUOeIwQ,25
6
- direl_ts_tool_kit/utilities/data_prep.py,sha256=IZJShsSGWxv6Q-rM7m69kGADNUhCzs-KfhphgqD29Ok,4703
7
- direl_ts_tool_kit-0.3.2.dist-info/licenses/LICENCE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- direl_ts_tool_kit-0.3.2.dist-info/METADATA,sha256=n0O3607YFBQYXu6eu27dutF5o_T2xC11JyUucb8Oafs,950
9
- direl_ts_tool_kit-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
- direl_ts_tool_kit-0.3.2.dist-info/top_level.txt,sha256=vMCRudnGnsdRg_6fUftnG8PF2Y1m0bjBDMf3pCAp6bc,18
11
- direl_ts_tool_kit-0.3.2.dist-info/RECORD,,
6
+ direl_ts_tool_kit/utilities/data_prep.py,sha256=rEBRr4SDc2oAlpHB3_cjjn-u0JSAwhzVxNEQ7pQCRc4,5900
7
+ direl_ts_tool_kit-0.4.1.dist-info/licenses/LICENCE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ direl_ts_tool_kit-0.4.1.dist-info/METADATA,sha256=GVSSUyKpZhCFEfc8oFm9vlVI7FVKtfEbBE-2UyE41TU,950
9
+ direl_ts_tool_kit-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
+ direl_ts_tool_kit-0.4.1.dist-info/top_level.txt,sha256=vMCRudnGnsdRg_6fUftnG8PF2Y1m0bjBDMf3pCAp6bc,18
11
+ direl_ts_tool_kit-0.4.1.dist-info/RECORD,,