sarapy 2.2.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarapy/analysis/FeaturesResume.py +722 -0
- sarapy/analysis/__init__.py +3 -0
- sarapy/dataProcessing/OpsProcessor.py +68 -33
- sarapy/dataProcessing/TLMSensorDataProcessor.py +5 -2
- sarapy/mlProcessors/FertilizerTransformer.py +7 -5
- sarapy/mlProcessors/PlantinClassifier.py +120 -31
- sarapy/mlProcessors/PlantinFMCreator.py +25 -12
- sarapy/mlProcessors/__init__.py +11 -0
- sarapy/preprocessing/TransformInputData.py +3 -2
- sarapy/preprocessing/__init__.py +11 -2
- sarapy/stats/__init__.py +13 -1
- sarapy/stats/stats.py +5 -6
- sarapy/utils/__init__.py +3 -0
- sarapy/utils/utils.py +172 -0
- sarapy/version.py +2 -2
- {sarapy-2.2.0.dist-info → sarapy-3.0.0.dist-info}/METADATA +39 -1
- sarapy-3.0.0.dist-info/RECORD +29 -0
- sarapy/utils/amg_decoder.py +0 -125
- sarapy/utils/amg_ppk.py +0 -38
- sarapy/utils/getRawOperations.py +0 -20
- sarapy-2.2.0.dist-info/RECORD +0 -29
- {sarapy-2.2.0.dist-info → sarapy-3.0.0.dist-info}/LICENCE +0 -0
- {sarapy-2.2.0.dist-info → sarapy-3.0.0.dist-info}/WHEEL +0 -0
- {sarapy-2.2.0.dist-info → sarapy-3.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
logger = logging.getLogger(__name__) # ← "sarapy.stats"
|
|
3
|
+
logging.getLogger("matplotlib.font_manager").setLevel(logging.WARNING)
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import matplotlib.pyplot as plt
|
|
7
|
+
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
|
8
|
+
import matplotlib.gridspec as gridspec
|
|
9
|
+
from matplotlib.ticker import ScalarFormatter
|
|
10
|
+
import seaborn as sns
|
|
11
|
+
from sarapy.mlProcessors import PlantinClassifier
|
|
12
|
+
from sarapy.preprocessing import TransformInputData
|
|
13
|
+
from sarapy.mlProcessors import PlantinFMCreator
|
|
14
|
+
from sarapy.stats import *
|
|
15
|
+
from sarapy.dataProcessing import OpsProcessor
|
|
16
|
+
import re
|
|
17
|
+
from datetime import datetime, time
|
|
18
|
+
|
|
19
|
+
class FeaturesResume():
|
|
20
|
+
def __init__(self, raw_data, info="", filtrar=None, updateTagSeedling=False, outliers=None,
|
|
21
|
+
kwargs_fmcreator=None, kwargs_classifier=None, timeFilter=None, window_size_ma=104):
|
|
22
|
+
"""
|
|
23
|
+
Constructor para inicializar la clase FeaturesResume.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
- info (str): Información de nodo o nodos, fecha, entre otras que puedan ser de interés.
|
|
27
|
+
"""
|
|
28
|
+
self.raw_data = raw_data
|
|
29
|
+
self.updateTagSeedling = updateTagSeedling
|
|
30
|
+
self.filtrar = filtrar
|
|
31
|
+
self.timeFilter = timeFilter
|
|
32
|
+
self.outliers = outliers
|
|
33
|
+
self.window_size_ma = window_size_ma
|
|
34
|
+
|
|
35
|
+
self.info = info
|
|
36
|
+
if not kwargs_fmcreator:
|
|
37
|
+
self.kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
38
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
39
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
40
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.8,
|
|
41
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
42
|
+
else:
|
|
43
|
+
self.kwargs_fmcreator = kwargs_fmcreator
|
|
44
|
+
|
|
45
|
+
if not kwargs_classifier:
|
|
46
|
+
self.kwargs_classifier = {"proba_threshold":0.2,
|
|
47
|
+
"use_proba_ma":False,
|
|
48
|
+
"proba_ma_window":10,
|
|
49
|
+
"update_samePlace":True,
|
|
50
|
+
"update_dstpt":True,
|
|
51
|
+
"useRatioStats":False,
|
|
52
|
+
"std_weight":1.,
|
|
53
|
+
"useDistancesStats":False,
|
|
54
|
+
"ratio_dcdp_umbral":0.0,
|
|
55
|
+
"dist_umbral":0.5,
|
|
56
|
+
"umbral_bajo_dstpt":4,
|
|
57
|
+
"umbral_proba_dstpt":0.70,
|
|
58
|
+
"use_ma":True,
|
|
59
|
+
"dstpt_ma_window":104,
|
|
60
|
+
"use_min_dstpt":False,
|
|
61
|
+
"factor":0.1}
|
|
62
|
+
else:
|
|
63
|
+
self.kwargs_classifier = kwargs_classifier
|
|
64
|
+
|
|
65
|
+
if timeFilter:
|
|
66
|
+
self.raw_data = self.filter_raw_by_time_window(**timeFilter)
|
|
67
|
+
|
|
68
|
+
self.plantinFMCreator = PlantinFMCreator(**self.kwargs_fmcreator)
|
|
69
|
+
self.tid = TransformInputData()
|
|
70
|
+
self.data = self.transformRawData(self.raw_data)
|
|
71
|
+
|
|
72
|
+
if self.filtrar == 1:
|
|
73
|
+
self.data = self.data[self.data["tag_seedling"] == 1]
|
|
74
|
+
elif self.filtrar == 0:
|
|
75
|
+
self.data = self.data[self.data["tag_seedling"] == 0]
|
|
76
|
+
|
|
77
|
+
if "dst_pt" in self.data.columns:
|
|
78
|
+
if len(self.data["dst_pt"]) < window_size_ma:
|
|
79
|
+
self.data["dst_pt_ma"] = self.getSensorMA(window_size=len(self.data["dst_pt"]))
|
|
80
|
+
else:
|
|
81
|
+
self.data["dst_pt_ma"] = self.getSensorMA(window_size=window_size_ma)
|
|
82
|
+
|
|
83
|
+
if "tag_seed_probas1" in self.data.columns:
|
|
84
|
+
if len(self.data["tag_seed_probas1"]) < window_size_ma:
|
|
85
|
+
self.data["tag_seed_probas1_ma"] = self.getProbasMA(window_size=len(self.data["tag_seed_probas1"]))
|
|
86
|
+
else:
|
|
87
|
+
self.data["tag_seed_probas1_ma"] = self.getProbasMA(window_size=window_size_ma)
|
|
88
|
+
|
|
89
|
+
def transformRawData(self, raw_data):
|
|
90
|
+
"""
|
|
91
|
+
Método para pre-procesar la información y obtener un DataFrame con las características que se usan.
|
|
92
|
+
|
|
93
|
+
Características a tomar:
|
|
94
|
+
N_MODE
|
|
95
|
+
|
|
96
|
+
Retorna:
|
|
97
|
+
DataFrame con las características siguientes:
|
|
98
|
+
- nodo
|
|
99
|
+
- tag_seedling
|
|
100
|
+
- tag_seedling_probas
|
|
101
|
+
- raw_tag_seedling
|
|
102
|
+
- tag_fertilizer
|
|
103
|
+
- raw_tag_fertilizer
|
|
104
|
+
- deltaO
|
|
105
|
+
- ratio_dCdP
|
|
106
|
+
- distances
|
|
107
|
+
- precision: del gps
|
|
108
|
+
- dst_pt
|
|
109
|
+
- inest_pt
|
|
110
|
+
- latitud
|
|
111
|
+
- longitud
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
samples = self.tid.transform(raw_data) #transformo los datos
|
|
115
|
+
temp_rawdatadf = pd.DataFrame(raw_data)
|
|
116
|
+
temp_samplesdf = pd.DataFrame(samples)
|
|
117
|
+
temporal_features, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(samples)
|
|
118
|
+
columns = [ 'nodo',
|
|
119
|
+
'tag_seedling',
|
|
120
|
+
'tag_seed_probas1',
|
|
121
|
+
'tag_seed_probas0',
|
|
122
|
+
'raw_tag_seedling',
|
|
123
|
+
'tag_fertilizer',
|
|
124
|
+
'raw_tag_fertilizer',
|
|
125
|
+
'deltaO',
|
|
126
|
+
'ratio_dCdP',
|
|
127
|
+
'time_ac',
|
|
128
|
+
'distances',
|
|
129
|
+
'precision',
|
|
130
|
+
'dst_pt',
|
|
131
|
+
'inest_pt',
|
|
132
|
+
'latitud',
|
|
133
|
+
'longitud',
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
#genero df
|
|
137
|
+
data = pd.DataFrame(columns=columns)
|
|
138
|
+
data["nodo"] = temp_rawdatadf["nodo"]
|
|
139
|
+
tags_seed_updated, probas = self.classifiedData(**self.kwargs_classifier)
|
|
140
|
+
if self.updateTagSeedling:
|
|
141
|
+
data["tag_seedling"] = tags_seed_updated
|
|
142
|
+
else:
|
|
143
|
+
data["tag_seedling"] = temp_rawdatadf["tag_seedling"]
|
|
144
|
+
data["tag_seed_probas1"] = probas[:,1]
|
|
145
|
+
data["tag_seed_probas0"] = probas[:,0]
|
|
146
|
+
data["raw_tag_seedling"] = temp_rawdatadf["raw_tag_seedling"]
|
|
147
|
+
data["tag_fertilizer"] = temp_rawdatadf["tag_fertilizer"]
|
|
148
|
+
data["raw_tag_fertilizer"] = temp_rawdatadf["raw_tag_fertilizer"]
|
|
149
|
+
data["deltaO"] = temporal_features[:,0]
|
|
150
|
+
data["ratio_dCdP"] = temporal_features[:,1]
|
|
151
|
+
data["time_ac"] = temp_samplesdf["TIME_AC"]
|
|
152
|
+
data["distances"] = temporal_features[:,2]
|
|
153
|
+
data["precision"] = temp_samplesdf["precision"]
|
|
154
|
+
data["dst_pt"] = dst_pt
|
|
155
|
+
data["inest_pt"] = inest_pt
|
|
156
|
+
data["latitud"] = temp_samplesdf["latitud"]
|
|
157
|
+
data["longitud"] = temp_samplesdf["longitud"]
|
|
158
|
+
|
|
159
|
+
if self.outliers:
|
|
160
|
+
data = self.removeOutliers(data.copy(), self.outliers)
|
|
161
|
+
|
|
162
|
+
return data
|
|
163
|
+
|
|
164
|
+
def classifiedData(self, classifier_file = 'modelos\\pipeline_rf.pkl', **kwargs_classifier):
|
|
165
|
+
|
|
166
|
+
raw_X = self.tid.transform(self.raw_data)
|
|
167
|
+
X, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(raw_X)
|
|
168
|
+
|
|
169
|
+
# ratio_dcdp_median = np.median(X[:, 1])
|
|
170
|
+
##reemplazo los datos de X[:, 1] por la mediana si están por debajo de -10
|
|
171
|
+
# X[:, 1] = np.where(X[:, 1] < -0.8, ratio_dcdp_median, X[:, 1])
|
|
172
|
+
# X[:, 0] = self.getMA(X[:, 0], window_size=26)
|
|
173
|
+
|
|
174
|
+
clasificador = PlantinClassifier(classifier_file=classifier_file)
|
|
175
|
+
|
|
176
|
+
clasificaciones, probas = clasificador.classify(X, dst_pt, inest_pt, **kwargs_classifier)
|
|
177
|
+
|
|
178
|
+
return clasificaciones, probas
|
|
179
|
+
|
|
180
|
+
def removeOutliers(self, data, limits:dict={"deltaO": (0, 3600),
|
|
181
|
+
"precision": (0, 10000)}):
|
|
182
|
+
"""
|
|
183
|
+
Función para eliminar outliers de las características procesadas.
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
##chqueo que columnas sí están dentro de self.data y limits.
|
|
187
|
+
##las que no están, se ignoran y se muestra un mensaje de warning
|
|
188
|
+
##actualizo las columnas dentro de limits eliminando las que no están en self.data
|
|
189
|
+
|
|
190
|
+
for col in list(limits.keys()):
|
|
191
|
+
if col not in data.columns:
|
|
192
|
+
logger.warning(f"La columna {col} no está en los datos y será ignorada.")
|
|
193
|
+
del limits[col]
|
|
194
|
+
|
|
195
|
+
##elimino outliers
|
|
196
|
+
for col, (lower, upper) in limits.items():
|
|
197
|
+
data = data[(data[col] >= lower) & (data[col] <= upper)]
|
|
198
|
+
|
|
199
|
+
return data
|
|
200
|
+
|
|
201
|
+
def getResume(self, to="all", pctbajo_value=1, pctalto_value=14, lista_funciones=None):
|
|
202
|
+
"""
|
|
203
|
+
Método para obtener un resumen de las características procesadas.
|
|
204
|
+
Para todas las características se obtienen los siguientes estadísticos:
|
|
205
|
+
- count
|
|
206
|
+
- over_total
|
|
207
|
+
- media
|
|
208
|
+
- mediana
|
|
209
|
+
- desviación estándar (std)
|
|
210
|
+
- mínimo
|
|
211
|
+
- máximo
|
|
212
|
+
- skew
|
|
213
|
+
- kurtosis
|
|
214
|
+
|
|
215
|
+
Además, para el caso de distorsión de plantin (dst_pt) se agrega pctbajo y pctalto.
|
|
216
|
+
|
|
217
|
+
Se calculan para todos los datos y para tag_seedling = 1 e tag_seedling = 0
|
|
218
|
+
|
|
219
|
+
Se retorna una pivote_table usando los indexes = ["all","1s","0s"]
|
|
220
|
+
"""
|
|
221
|
+
if not lista_funciones:
|
|
222
|
+
lista_funciones = ["count", "mean", "median","std", "min", "max", "skew", "kurt"]
|
|
223
|
+
data_wo_node = self.data.copy()
|
|
224
|
+
data_wo_node = data_wo_node.drop(columns=["nodo"])
|
|
225
|
+
num_cols = data_wo_node.select_dtypes(include="number").columns
|
|
226
|
+
|
|
227
|
+
if to == 1:
|
|
228
|
+
data_wo_node = data_wo_node[data_wo_node["tag_seedling"] == 1]
|
|
229
|
+
elif to == 0:
|
|
230
|
+
data_wo_node = data_wo_node[data_wo_node["tag_seedling"] == 0]
|
|
231
|
+
|
|
232
|
+
stats = data_wo_node[num_cols].agg(lista_funciones)
|
|
233
|
+
|
|
234
|
+
operaciones = len(self.data)
|
|
235
|
+
over_val = (len(data_wo_node) / operaciones) if operaciones > 0 else np.nan
|
|
236
|
+
over = pd.Series(over_val, index=stats.columns, name="over_total")
|
|
237
|
+
|
|
238
|
+
arriba = stats.loc[["count"]]
|
|
239
|
+
abajo = stats.drop(index=["count"])
|
|
240
|
+
stats = pd.concat([arriba, over.to_frame().T, abajo], axis=0)
|
|
241
|
+
|
|
242
|
+
if "dst_pt" in data_wo_node.columns:
|
|
243
|
+
pct_bajo = float(np.mean(data_wo_node["dst_pt"] < pctbajo_value))
|
|
244
|
+
pct_alto = float(np.mean(data_wo_node["dst_pt"] > pctalto_value))
|
|
245
|
+
# Insertamos/actualizamos esas filas en la columna dst_pt.
|
|
246
|
+
stats.loc["pct_bajo", "dst_pt"] = pct_bajo
|
|
247
|
+
stats.loc["pct_alto", "dst_pt"] = pct_alto
|
|
248
|
+
|
|
249
|
+
##reemplazo los valores NaN por "no aplica"
|
|
250
|
+
stats = stats.fillna("not apply")
|
|
251
|
+
|
|
252
|
+
return stats
|
|
253
|
+
|
|
254
|
+
def getSensorMA(self, window_size=104, mode='same'):
|
|
255
|
+
"""
|
|
256
|
+
Función para calcular la media móvil de una serie temporal.
|
|
257
|
+
data: numpy array con los datos de la serie temporal
|
|
258
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
259
|
+
"""
|
|
260
|
+
# return np.convolve(self.data["dst_pt"].values, np.ones(window_size)/window_size, mode=mode)
|
|
261
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
262
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
263
|
+
padding_start = self.data["dst_pt"].values[0:window_size]
|
|
264
|
+
padding_end = self.data["dst_pt"].values[-window_size:]
|
|
265
|
+
padded_data = np.concatenate([padding_start, self.data["dst_pt"].values, padding_end])
|
|
266
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
267
|
+
return ma_full[window_size: -window_size]
|
|
268
|
+
|
|
269
|
+
def getProbasMA(self, window_size=104, mode='same'):
|
|
270
|
+
"""
|
|
271
|
+
Función para calcular la media móvil de una serie temporal.
|
|
272
|
+
data: numpy array con los datos de la serie temporal
|
|
273
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
274
|
+
"""
|
|
275
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
276
|
+
##copio los primeros y últimos valores usando la misma cantidad que window_size
|
|
277
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
278
|
+
padding_start = self.data["tag_seed_probas1"].values[0:window_size]
|
|
279
|
+
padding_end = self.data["tag_seed_probas1"].values[-window_size:]
|
|
280
|
+
padded_data = np.concatenate([padding_start, self.data["tag_seed_probas1"].values, padding_end])
|
|
281
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
282
|
+
return ma_full[window_size: -window_size]
|
|
283
|
+
|
|
284
|
+
def getMA(self, data: np.array, window_size=104, mode='same'):
|
|
285
|
+
"""
|
|
286
|
+
Función para calcular la media móvil de una serie temporal.
|
|
287
|
+
data: numpy array con los datos de la serie temporal
|
|
288
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
289
|
+
"""
|
|
290
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
291
|
+
##copio los primeros y últimos valores usando la misma cantidad que window_size
|
|
292
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
293
|
+
padding_start = data[0:window_size]
|
|
294
|
+
padding_end = data[-window_size:]
|
|
295
|
+
padded_data = np.concatenate([padding_start, data, padding_end])
|
|
296
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
297
|
+
return ma_full[window_size: -window_size]
|
|
298
|
+
|
|
299
|
+
def to_time_obj(self,t):
|
|
300
|
+
"""
|
|
301
|
+
Acepta 'HH:MM[:SS]' 24h o 'h:MM[:SS] a.m./p.m.' (con o sin puntos/espacios) y retorna datetime.time.
|
|
302
|
+
"""
|
|
303
|
+
if isinstance(t, time):
|
|
304
|
+
return t
|
|
305
|
+
s = str(t).strip().lower()
|
|
306
|
+
# normalizar variantes 'a.m.', 'a. m.', etc. → 'am'/'pm'
|
|
307
|
+
s = re.sub(r'\s+', '', s) # quitar espacios
|
|
308
|
+
s = s.replace('.', '') # quitar puntos
|
|
309
|
+
s = s.replace('a m', 'am').replace('p m', 'pm') # por si quedan
|
|
310
|
+
# 12h con am/pm
|
|
311
|
+
if 'am' in s or 'pm' in s:
|
|
312
|
+
for fmt in ('%I:%M:%S%p', '%I:%M%p'):
|
|
313
|
+
try: return datetime.strptime(s.upper(), fmt).time()
|
|
314
|
+
except ValueError: pass
|
|
315
|
+
raise ValueError(f"No pude interpretar la hora 12h: {t!r}")
|
|
316
|
+
# 24h
|
|
317
|
+
for fmt in ('%H:%M:%S', '%H:%M'):
|
|
318
|
+
try: return datetime.strptime(t, fmt).time()
|
|
319
|
+
except ValueError: pass
|
|
320
|
+
raise ValueError(f"No pude interpretar la hora 24h: {t!r}")
|
|
321
|
+
|
|
322
|
+
def time_to_td(self,t: time) -> pd.Timedelta:
|
|
323
|
+
return pd.Timedelta(hours=t.hour, minutes=t.minute, seconds=t.second, microseconds=t.microsecond)
|
|
324
|
+
|
|
325
|
+
def filter_raw_by_time_window(self,
|
|
326
|
+
start_time, end_time,
|
|
327
|
+
tz_target: str = "America/Montevideo",
|
|
328
|
+
timestamp_key: str = "timestamp",
|
|
329
|
+
inclusive: str = "both", # 'both' | 'neither' | 'left' | 'right',
|
|
330
|
+
inplace = False):
|
|
331
|
+
"""
|
|
332
|
+
Filtra registros cuyo 'timestamp' caiga entre [start_time, end_time] en la zona 'tz_target'.
|
|
333
|
+
- start_time/end_time: 'HH:MM[:SS]' 24h o 'h:MM[:SS] a.m./p.m.' o datetime.time
|
|
334
|
+
- Soporta ventanas que cruzan medianoche (p.ej. 23:30 a 01:15).
|
|
335
|
+
Retorna la misma estructura: lista de dicts si raw_data era lista; DataFrame si era DataFrame.
|
|
336
|
+
"""
|
|
337
|
+
df = pd.DataFrame(self.raw_data) if not isinstance(self.raw_data, pd.DataFrame) else self.raw_data.copy()
|
|
338
|
+
if timestamp_key not in df.columns:
|
|
339
|
+
raise KeyError(f"Columna {timestamp_key!r} no encontrada en los datos.")
|
|
340
|
+
|
|
341
|
+
# 1) Parseo y conversión de zona horaria
|
|
342
|
+
ts_utc = pd.to_datetime(df[timestamp_key], utc=True, errors='coerce')
|
|
343
|
+
if ts_utc.isna().any():
|
|
344
|
+
n_bad = int(ts_utc.isna().sum())
|
|
345
|
+
raise ValueError(f"Hay {n_bad} timestamps inválidos/imposibles de parsear.")
|
|
346
|
+
ts_local = ts_utc.dt.tz_convert(tz_target)
|
|
347
|
+
|
|
348
|
+
# 2) Hora-del-día como Timedelta desde medianoche local
|
|
349
|
+
tod = ts_local - ts_local.dt.normalize()
|
|
350
|
+
|
|
351
|
+
# 3) Ventana objetivo → Timedelta
|
|
352
|
+
t0 = self.time_to_td(self.to_time_obj(start_time))
|
|
353
|
+
t1 = self.time_to_td(self.to_time_obj(end_time))
|
|
354
|
+
|
|
355
|
+
# 4) Construcción de máscara (maneja cruce de medianoche)
|
|
356
|
+
if t0 <= t1:
|
|
357
|
+
mask = tod.between(t0, t1, inclusive=inclusive)
|
|
358
|
+
else:
|
|
359
|
+
# ejemplo: 23:30 → 01:15 (dos tramos)
|
|
360
|
+
mask = tod.ge(t0) | tod.le(t1)
|
|
361
|
+
if inclusive in ("neither", "right"): # ajustar extremos si no inclusivo
|
|
362
|
+
mask &= ~tod.eq(t0)
|
|
363
|
+
if inclusive in ("neither", "left"):
|
|
364
|
+
mask &= ~tod.eq(t1)
|
|
365
|
+
|
|
366
|
+
filtered = df[mask]
|
|
367
|
+
#me quedo con los indices donde se cumpla df[mask] y aplico a self.raw_data de origen
|
|
368
|
+
|
|
369
|
+
##chequeo que filtered no esté vacio, sino retorno None
|
|
370
|
+
if filtered.empty or len(filtered) < 10:
|
|
371
|
+
logger.warning("El filtro de tiempo resultó en un conjunto vacío.")
|
|
372
|
+
print("El filtro de tiempo resultó en un conjunto vacío.")
|
|
373
|
+
return None
|
|
374
|
+
|
|
375
|
+
#si inplace, actualizo filtro raw_data y retorno un nuevo objeto FeaturesResume, sino retorno los datos filtrados
|
|
376
|
+
if inplace:
|
|
377
|
+
return filtered.to_dict(orient='records') if not isinstance(self.raw_data, pd.DataFrame) else filtered
|
|
378
|
+
else:
|
|
379
|
+
#copio el estado del objeto actual
|
|
380
|
+
new_fr = FeaturesResume(
|
|
381
|
+
raw_data = filtered.to_dict(orient='records') if not isinstance(self.raw_data, pd.DataFrame) else filtered,
|
|
382
|
+
info = self.info,
|
|
383
|
+
filtrar = self.filtrar,
|
|
384
|
+
updateTagSeedling = self.updateTagSeedling,
|
|
385
|
+
kwargs_fmcreator = self.kwargs_fmcreator,
|
|
386
|
+
kwargs_classifier = self.kwargs_classifier,
|
|
387
|
+
timeFilter = None, # ya apliqué el filtro
|
|
388
|
+
outliers = self.outliers,
|
|
389
|
+
window_size_ma=self.window_size_ma,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
return new_fr
|
|
393
|
+
|
|
394
|
+
def _get_ratiodCdPPlot(self, figsize = (10,6), show = False):
|
|
395
|
+
"""
|
|
396
|
+
Función para retornar (y graficar si se desea) un gráfico de línea de
|
|
397
|
+
ratio_dCdP y tag_seedling. El eje Y izquierdo es ratio y el derecho es el tag_seedling
|
|
398
|
+
"""
|
|
399
|
+
# Verificamos que existan las columnas necesarias
|
|
400
|
+
if "ratio_dCdP" not in self.data.columns or "tag_seedling" not in self.data.columns:
|
|
401
|
+
raise ValueError("Faltan columnas necesarias para graficar.")
|
|
402
|
+
|
|
403
|
+
fig, ax1 = plt.subplots(figsize=figsize)
|
|
404
|
+
|
|
405
|
+
# Eje izquierdo: ratio_dCdP
|
|
406
|
+
ax1.plot(self.data["ratio_dCdP"], label='ratio_dCdP', color='blue')
|
|
407
|
+
ax1.set_xlabel("Operación")
|
|
408
|
+
ax1.set_ylabel("Ratio dCdP", color='blue')
|
|
409
|
+
ax1.tick_params(axis='y', labelcolor='blue')
|
|
410
|
+
|
|
411
|
+
# Forzar eje Y en formato decimal
|
|
412
|
+
ax1.yaxis.set_major_formatter(ScalarFormatter(useMathText=False))
|
|
413
|
+
ax1.ticklabel_format(style='plain', axis='y') # Asegura formato decimal
|
|
414
|
+
|
|
415
|
+
# Eje derecho: tag_seedling
|
|
416
|
+
ax2 = ax1.twinx()
|
|
417
|
+
ax2.plot(self.data["tag_seedling"], label='tag_seedling', color='red')
|
|
418
|
+
ax2.set_ylabel("Tag Seedling", color='red')
|
|
419
|
+
ax2.tick_params(axis='y', labelcolor='red')
|
|
420
|
+
ax2.set_ylim(0, 5) # Limitar el eje Y de tag_seedling entre 0 y 5
|
|
421
|
+
|
|
422
|
+
plt.title(f"Análisis de {self.info} - Ratio dCdP y Tag Seedling")
|
|
423
|
+
fig.tight_layout()
|
|
424
|
+
|
|
425
|
+
if show:
|
|
426
|
+
plt.show()
|
|
427
|
+
|
|
428
|
+
return fig
|
|
429
|
+
|
|
430
|
+
def plotFeatureComparison(
|
|
431
|
+
self,
|
|
432
|
+
feature1: str,
|
|
433
|
+
feature2: str,
|
|
434
|
+
y1limits=None,
|
|
435
|
+
y2limits=None,
|
|
436
|
+
figsize=(10, 6),
|
|
437
|
+
title=None,
|
|
438
|
+
show=False,
|
|
439
|
+
save=False,
|
|
440
|
+
filename=None,
|
|
441
|
+
colors = ('blue', 'red'),
|
|
442
|
+
*,
|
|
443
|
+
line1: bool = True, # ¿dibujar línea en ax1?
|
|
444
|
+
line2: bool = True, # ¿dibujar línea en ax2?
|
|
445
|
+
marker1: str | None = None, # p.ej. 'o', 's', '^' para ax1
|
|
446
|
+
marker2: str | None = None, # p.ej. 'o', 's', '^' para ax2
|
|
447
|
+
markersize: float = 6
|
|
448
|
+
):
|
|
449
|
+
"""
|
|
450
|
+
Genera un gráfico de comparación entre dos características en ejes y diferentes.
|
|
451
|
+
Se puede elegir si cada eje usa línea, solo marcadores, o ambos.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
- feature1, feature2: nombres de columnas en self.data.
|
|
455
|
+
- y1limits, y2limits: tuplas (ymin, ymax) opcionales.
|
|
456
|
+
- figsize: tamaño de la figura.
|
|
457
|
+
- show: si se muestra la figura.
|
|
458
|
+
- line1, line2: True = dibuja línea; False = solo marcadores (si se especifica marker).
|
|
459
|
+
- marker1, marker2: símbolos de marcador (ej. 'o'); None = sin marcador.
|
|
460
|
+
- markersize: tamaño del marcador.
|
|
461
|
+
"""
|
|
462
|
+
|
|
463
|
+
# chequeo que las características estén en los datos
|
|
464
|
+
if feature1 not in self.data.columns or feature2 not in self.data.columns:
|
|
465
|
+
raise ValueError("Faltan columnas necesarias para graficar.")
|
|
466
|
+
|
|
467
|
+
fig, ax1 = plt.subplots(figsize=figsize)
|
|
468
|
+
|
|
469
|
+
# ---- Eje izquierdo: feature1
|
|
470
|
+
ls1 = '-' if line1 else 'None' # 'None' evita trazar línea
|
|
471
|
+
ax1.plot(
|
|
472
|
+
self.data.index,
|
|
473
|
+
self.data[feature1].values,
|
|
474
|
+
label=feature1,
|
|
475
|
+
color=colors[0],
|
|
476
|
+
linestyle=ls1,
|
|
477
|
+
marker=marker1,
|
|
478
|
+
markersize=markersize
|
|
479
|
+
)
|
|
480
|
+
ax1.set_xlabel("Operación")
|
|
481
|
+
ax1.set_ylabel(feature1, color=colors[0])
|
|
482
|
+
ax1.tick_params(axis='y', labelcolor=colors[0])
|
|
483
|
+
|
|
484
|
+
# Formato decimal y límites opcionales
|
|
485
|
+
ax1.yaxis.set_major_formatter(ScalarFormatter(useMathText=False))
|
|
486
|
+
ax1.ticklabel_format(style='plain', axis='y')
|
|
487
|
+
if y1limits is not None:
|
|
488
|
+
ax1.set_ylim(y1limits)
|
|
489
|
+
|
|
490
|
+
# ---- Eje derecho: feature2
|
|
491
|
+
ax2 = ax1.twinx()
|
|
492
|
+
ls2 = '-' if line2 else 'None'
|
|
493
|
+
ax2.plot(
|
|
494
|
+
self.data.index,
|
|
495
|
+
self.data[feature2].values,
|
|
496
|
+
label=feature2,
|
|
497
|
+
color=colors[1],
|
|
498
|
+
linestyle=ls2,
|
|
499
|
+
marker=marker2,
|
|
500
|
+
markersize=markersize
|
|
501
|
+
)
|
|
502
|
+
ax2.set_ylabel(feature2, color=colors[1])
|
|
503
|
+
ax2.tick_params(axis='y', labelcolor=colors[1])
|
|
504
|
+
if y2limits is not None:
|
|
505
|
+
ax2.set_ylim(y2limits)
|
|
506
|
+
|
|
507
|
+
# Título y layout
|
|
508
|
+
if title is not None:
|
|
509
|
+
plt.title(title)
|
|
510
|
+
else:
|
|
511
|
+
plt.title(f"Análisis de {self.info} - {feature1} y {feature2}")
|
|
512
|
+
fig.tight_layout()
|
|
513
|
+
|
|
514
|
+
# Leyenda combinada de ambos ejes
|
|
515
|
+
lines1, labels1 = ax1.get_legend_handles_labels()
|
|
516
|
+
lines2, labels2 = ax2.get_legend_handles_labels()
|
|
517
|
+
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')
|
|
518
|
+
|
|
519
|
+
if save:
|
|
520
|
+
if filename is not None:
|
|
521
|
+
plt.savefig(filename)
|
|
522
|
+
else:
|
|
523
|
+
plt.savefig(f"feature_comparison_{feature1}_{feature2}.png")
|
|
524
|
+
|
|
525
|
+
if show:
|
|
526
|
+
plt.show()
|
|
527
|
+
else:
|
|
528
|
+
plt.close(fig) # Cierra la figura para liberar memoria
|
|
529
|
+
|
|
530
|
+
##gráfico de dispersión para comparar la distribución de 0s y 1s
|
|
531
|
+
def plot_geo_compare(
|
|
532
|
+
self,
|
|
533
|
+
feature_col: str,
|
|
534
|
+
lat_col: str = "latitud",
|
|
535
|
+
lon_col: str = "longitud",
|
|
536
|
+
tag_col: str = "tag_seedling",
|
|
537
|
+
cmap: str = "winter",
|
|
538
|
+
figsize=(14, 6),
|
|
539
|
+
s: float = 10.0,
|
|
540
|
+
alpha: float = 0.8,
|
|
541
|
+
equal_aspect: bool = True,
|
|
542
|
+
save = False,
|
|
543
|
+
show = True,
|
|
544
|
+
filename = None,
|
|
545
|
+
# ---- NUEVO: control de colorbar y límites de color ----
|
|
546
|
+
vmin: float | None = None,
|
|
547
|
+
vmax: float | None = None,
|
|
548
|
+
cb_width: float = 0.02, # ancho relativo del colorbar (fracción del eje del mapa)
|
|
549
|
+
cb_pad: float = 0.02, # separación entre mapa y colorbar (en fracción)
|
|
550
|
+
cb_ticks: int | None = None, # número aprox. de ticks (None = automático)
|
|
551
|
+
):
|
|
552
|
+
# -------- Validación --------
|
|
553
|
+
df = self.data
|
|
554
|
+
required_cols = {lat_col, lon_col, tag_col, feature_col}
|
|
555
|
+
missing = [c for c in required_cols if c not in df.columns]
|
|
556
|
+
if missing:
|
|
557
|
+
raise ValueError(f"Faltan columnas en el DataFrame: {missing}")
|
|
558
|
+
|
|
559
|
+
# Datos y máscaras sin NaN
|
|
560
|
+
left = df[[lat_col, lon_col, tag_col]].dropna()
|
|
561
|
+
right = df[[lat_col, lon_col, feature_col]].dropna()
|
|
562
|
+
|
|
563
|
+
# -------- Figura principal (2 subplots, sin colorbar aún) --------
|
|
564
|
+
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=figsize, constrained_layout=False)
|
|
565
|
+
plt.subplots_adjust(wspace=0.25, left=0.06, right=0.94, bottom=0.10, top=0.90)
|
|
566
|
+
|
|
567
|
+
# -------- Subplot izquierdo: binario rojo/verde --------
|
|
568
|
+
color_map = {1: "green", 0: "red"}
|
|
569
|
+
colors_left = left[tag_col].map(color_map).fillna("gray").values
|
|
570
|
+
ax0.scatter(left[lon_col], left[lat_col], c=colors_left, s=s, alpha=alpha, linewidths=0)
|
|
571
|
+
ax0.set_xlabel("Longitud"); ax0.set_ylabel("Latitud")
|
|
572
|
+
ax0.set_title("Semilleros (verde=1, rojo=0)")
|
|
573
|
+
from matplotlib.lines import Line2D
|
|
574
|
+
leg = [
|
|
575
|
+
Line2D([0],[0], marker='o', color='w', label=f"{tag_col} = 1", markerfacecolor='green', markersize=8),
|
|
576
|
+
Line2D([0],[0], marker='o', color='w', label=f"{tag_col} = 0", markerfacecolor='red', markersize=8),
|
|
577
|
+
]
|
|
578
|
+
if (~left[tag_col].isin([0,1])).any():
|
|
579
|
+
leg.append(Line2D([0],[0], marker='o', color='w', label=f"{tag_col} ≠ 0/1", markerfacecolor='gray', markersize=8))
|
|
580
|
+
ax0.legend(handles=leg, loc="best", frameon=True)
|
|
581
|
+
|
|
582
|
+
# -------- Subplot derecho: continuo por feature con vmin/vmax --------
|
|
583
|
+
vals = right[feature_col].to_numpy(dtype=float)
|
|
584
|
+
# límites automáticos si no se pasan
|
|
585
|
+
vmin_eff = np.nanmin(vals) if vmin is None else float(vmin)
|
|
586
|
+
vmax_eff = np.nanmax(vals) if vmax is None else float(vmax)
|
|
587
|
+
|
|
588
|
+
sc = ax1.scatter(
|
|
589
|
+
right[lon_col], right[lat_col],
|
|
590
|
+
c=vals, cmap=cmap, vmin=vmin_eff, vmax=vmax_eff,
|
|
591
|
+
s=s, alpha=alpha, linewidths=0
|
|
592
|
+
)
|
|
593
|
+
ax1.set_xlabel("Longitud"); ax1.set_ylabel("Latitud")
|
|
594
|
+
ax1.set_title(f"Color por feature: {feature_col}")
|
|
595
|
+
|
|
596
|
+
# -------- Colorbar delgado adosado al segundo mapa --------
|
|
597
|
+
divider = make_axes_locatable(ax1)
|
|
598
|
+
# 'size' puede ser porcentaje del eje del mapa (p.ej. "2%")
|
|
599
|
+
cax = divider.append_axes("right", size=f"{cb_width*100:.1f}%", pad=cb_pad)
|
|
600
|
+
cbar = fig.colorbar(sc, cax=cax)
|
|
601
|
+
cbar.set_label(feature_col)
|
|
602
|
+
if cb_ticks is not None and cb_ticks > 0:
|
|
603
|
+
cbar.locator = plt.MaxNLocator(cb_ticks)
|
|
604
|
+
cbar.update_ticks()
|
|
605
|
+
# Opcional: tipografía/tamaño de ticks del colorbar
|
|
606
|
+
cbar.ax.tick_params(labelsize=8)
|
|
607
|
+
|
|
608
|
+
# -------- Ajustes comunes --------
|
|
609
|
+
if equal_aspect:
|
|
610
|
+
ax0.set_aspect('equal', adjustable='box')
|
|
611
|
+
ax1.set_aspect('equal', adjustable='box')
|
|
612
|
+
|
|
613
|
+
# Misma caja geográfica en ambos paneles para comparación directa
|
|
614
|
+
xmin = np.nanmin(df[lon_col].to_numpy())
|
|
615
|
+
xmax = np.nanmax(df[lon_col].to_numpy())
|
|
616
|
+
ymin = np.nanmin(df[lat_col].to_numpy())
|
|
617
|
+
ymax = np.nanmax(df[lat_col].to_numpy())
|
|
618
|
+
for ax in (ax0, ax1):
|
|
619
|
+
ax.set_xlim(xmin, xmax)
|
|
620
|
+
ax.set_ylim(ymin, ymax)
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
if save:
|
|
624
|
+
if filename is not None:
|
|
625
|
+
plt.savefig(filename)
|
|
626
|
+
else:
|
|
627
|
+
plt.savefig(f"geo_compare_{feature_col}.png")
|
|
628
|
+
if show:
|
|
629
|
+
plt.show()
|
|
630
|
+
plt.close(fig) # Cierra la figura para liberar memoria
|
|
631
|
+
|
|
632
|
+
if __name__ == "__main__":
|
|
633
|
+
import json
|
|
634
|
+
from sarapy.utils import dataMerging
|
|
635
|
+
import numpy as np
|
|
636
|
+
import matplotlib.pyplot as plt
|
|
637
|
+
from sarapy.utils.plotting import plotTemporalData
|
|
638
|
+
plt.style.use('bmh')
|
|
639
|
+
|
|
640
|
+
pkg_logger = logging.getLogger("sarapy.stats")
|
|
641
|
+
pkg_logger.setLevel(logging.ERROR)
|
|
642
|
+
|
|
643
|
+
## argumentos de PlantinFMCreator
|
|
644
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
645
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
646
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
647
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
648
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
652
|
+
kwargs_classifier = {"proba_threshold":0.45,
|
|
653
|
+
"use_proba_ma":False,
|
|
654
|
+
"proba_ma_window":10,
|
|
655
|
+
"update_samePlace":True,
|
|
656
|
+
"update_dstpt":True,
|
|
657
|
+
"umbral_proba_dstpt":0.5,
|
|
658
|
+
"umbral_bajo_dstpt":1.5,
|
|
659
|
+
"use_ma":True,
|
|
660
|
+
"dstpt_ma_window":62,
|
|
661
|
+
"use_min_dstpt":False,
|
|
662
|
+
"factor":0.1,
|
|
663
|
+
|
|
664
|
+
"useRatioStats":False,
|
|
665
|
+
"std_weight":1.,
|
|
666
|
+
"useDistancesStats":False,
|
|
667
|
+
"ratio_dcdp_umbral":0.1,
|
|
668
|
+
"dist_umbral":0.5,
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
time_filter=None
|
|
673
|
+
|
|
674
|
+
nodo = "UPM039N"
|
|
675
|
+
fecha = "2025-09-04"
|
|
676
|
+
save = True
|
|
677
|
+
show = False
|
|
678
|
+
|
|
679
|
+
hdpath = f"examples\\{fecha}\\{nodo}\\historical-data.json" #historical file
|
|
680
|
+
pppath = f"examples\\{fecha}\\{nodo}\\post-processing-data.json" #post-processing file
|
|
681
|
+
raw_data = f"examples\\{fecha}\\{nodo}\\data.json" #raw file
|
|
682
|
+
|
|
683
|
+
with open(hdpath, 'r') as file:
|
|
684
|
+
historical_data = json.load(file)
|
|
685
|
+
with open(pppath, 'r') as file:
|
|
686
|
+
post_data = json.load(file)
|
|
687
|
+
with open(raw_data, 'r') as file:
|
|
688
|
+
raw_data = json.load(file)
|
|
689
|
+
|
|
690
|
+
merged_data = dataMerging(historical_data, post_data, raw_data, nodoName=nodo,newColumns=False, asDF=False)
|
|
691
|
+
|
|
692
|
+
outliers = {
|
|
693
|
+
"ratio_dCdP": (-5, 2),
|
|
694
|
+
"deltaO": (0, 3600),
|
|
695
|
+
"time_ac": (0, 100),
|
|
696
|
+
"precision": (0, 5000),
|
|
697
|
+
"distances": (0, 100)
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
fr = FeaturesResume(merged_data, info = nodo, filtrar=None, outliers=outliers,
|
|
701
|
+
kwargs_classifier=kwargs_classifier,
|
|
702
|
+
kwargs_fmcreator=kwargs_fmcreator,
|
|
703
|
+
updateTagSeedling=True, timeFilter=None,
|
|
704
|
+
window_size_ma=62)
|
|
705
|
+
|
|
706
|
+
print(fr.data["tag_seedling"].value_counts(normalize=True))
|
|
707
|
+
print(fr.getResume(to="all"))
|
|
708
|
+
|
|
709
|
+
time_filter = {"start_time": "13:29:13",
|
|
710
|
+
"end_time": "13:43:19",
|
|
711
|
+
"tz_target": "America/Montevideo",
|
|
712
|
+
"timestamp_key": "timestamp",
|
|
713
|
+
"inclusive": "both", # 'both' | 'neither' | 'left' | 'right',
|
|
714
|
+
"inplace": False
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
new_fr = fr.filter_raw_by_time_window(**time_filter)
|
|
718
|
+
print(new_fr.getResume(to="all"))
|
|
719
|
+
new_fr.plotFeatureComparison("dst_pt_ma", "tag_seed_probas1", figsize=(12, 8),
|
|
720
|
+
show=True, line2=True, marker2=None)
|
|
721
|
+
new_fr.plotFeatureComparison("dst_pt_ma", "tag_seedling", figsize=(12, 8),
|
|
722
|
+
show=True, line2=True, marker2=None)
|