sarapy 2.1.1__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sarapy-2.1.1/sarapy.egg-info → sarapy-2.3.0}/PKG-INFO +10 -1
- {sarapy-2.1.1 → sarapy-2.3.0}/README.md +9 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/pyproject.toml +1 -1
- sarapy-2.3.0/sarapy/analysis/FeaturesResume.py +618 -0
- sarapy-2.3.0/sarapy/analysis/__init__.py +3 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/dataProcessing/OpsProcessor.py +49 -25
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/mlProcessors/PlantinClassifier.py +79 -32
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/mlProcessors/PlantinFMCreator.py +8 -11
- sarapy-2.3.0/sarapy/mlProcessors/__init__.py +11 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/preprocessing/TransformInputData.py +2 -2
- sarapy-2.3.0/sarapy/preprocessing/__init__.py +11 -0
- sarapy-2.3.0/sarapy/stats/__init__.py +13 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/stats/stats.py +5 -6
- sarapy-2.3.0/sarapy/utils/__init__.py +3 -0
- sarapy-2.3.0/sarapy/utils/utils.py +172 -0
- sarapy-2.3.0/sarapy/version.py +2 -0
- {sarapy-2.1.1 → sarapy-2.3.0/sarapy.egg-info}/PKG-INFO +10 -1
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy.egg-info/SOURCES.txt +4 -4
- sarapy-2.1.1/sarapy/mlProcessors/__init__.py +0 -0
- sarapy-2.1.1/sarapy/preprocessing/__init__.py +0 -2
- sarapy-2.1.1/sarapy/stats/__init__.py +0 -1
- sarapy-2.1.1/sarapy/utils/__init__.py +0 -0
- sarapy-2.1.1/sarapy/utils/amg_decoder.py +0 -125
- sarapy-2.1.1/sarapy/utils/amg_ppk.py +0 -38
- sarapy-2.1.1/sarapy/utils/getRawOperations.py +0 -20
- sarapy-2.1.1/sarapy/version.py +0 -2
- {sarapy-2.1.1 → sarapy-2.3.0}/LICENCE +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/__init__.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/dataProcessing/GeoProcessor.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/dataProcessing/TLMSensorDataProcessor.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/dataProcessing/TimeSeriesProcessor.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/dataProcessing/__init__.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/mlProcessors/FertilizerFMCreator.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/mlProcessors/FertilizerTransformer.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/preprocessing/DistancesImputer.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/preprocessing/FertilizerImputer.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/preprocessing/TransformToOutputData.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy/utils/plotting.py +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy.egg-info/dependency_links.txt +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy.egg-info/requires.txt +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/sarapy.egg-info/top_level.txt +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/setup.cfg +0 -0
- {sarapy-2.1.1 → sarapy-2.3.0}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sarapy
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Home-page: https://github.com/lucasbaldezzari/sarapy
|
|
5
5
|
Author: Lucas Baldezzari
|
|
6
6
|
Author-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
@@ -19,6 +19,15 @@ Requires-Dist: geopy
|
|
|
19
19
|
|
|
20
20
|
Library for processing SARAPICO project metadata of _AMG SA_.
|
|
21
21
|
|
|
22
|
+
#### Version 2.3.0
|
|
23
|
+
|
|
24
|
+
- Se agregan funcionalidades.
|
|
25
|
+
- Se corrigen errores menores.
|
|
26
|
+
|
|
27
|
+
#### Version 2.2.0
|
|
28
|
+
|
|
29
|
+
- Se agrega baseDeltaP en PlantinFMCreator para poder dividir el timestamp de la electrónica por 10, ya que se envía en décimas de segundo.
|
|
30
|
+
|
|
22
31
|
#### Version 2.1.1
|
|
23
32
|
|
|
24
33
|
- Se corrige error de tiepo en TransformToOutputData.
|
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
Library for processing SARAPICO project metadata of _AMG SA_.
|
|
4
4
|
|
|
5
|
+
#### Version 2.3.0
|
|
6
|
+
|
|
7
|
+
- Se agregan funcionalidades.
|
|
8
|
+
- Se corrigen errores menores.
|
|
9
|
+
|
|
10
|
+
#### Version 2.2.0
|
|
11
|
+
|
|
12
|
+
- Se agrega baseDeltaP en PlantinFMCreator para poder dividir el timestamp de la electrónica por 10, ya que se envía en décimas de segundo.
|
|
13
|
+
|
|
5
14
|
#### Version 2.1.1
|
|
6
15
|
|
|
7
16
|
- Se corrige error de tiepo en TransformToOutputData.
|
|
@@ -0,0 +1,618 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
logger = logging.getLogger(__name__) # ← "sarapy.stats"
|
|
3
|
+
logging.getLogger("matplotlib.font_manager").setLevel(logging.WARNING)
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import matplotlib.pyplot as plt
|
|
7
|
+
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
|
8
|
+
import matplotlib.gridspec as gridspec
|
|
9
|
+
from matplotlib.ticker import ScalarFormatter
|
|
10
|
+
import seaborn as sns
|
|
11
|
+
from sarapy.mlProcessors import PlantinClassifier
|
|
12
|
+
from sarapy.preprocessing import TransformInputData
|
|
13
|
+
from sarapy.mlProcessors import PlantinFMCreator
|
|
14
|
+
from sarapy.stats import *
|
|
15
|
+
from sarapy.dataProcessing import OpsProcessor
|
|
16
|
+
import re
|
|
17
|
+
from datetime import datetime, time
|
|
18
|
+
|
|
19
|
+
class FeaturesResume():
|
|
20
|
+
def __init__(self, raw_data, info="", filtrar=None, updateTagSeedling=False,
|
|
21
|
+
kwargs_fmcreator=None, kwargs_classifier=None, timeFilter=None):
|
|
22
|
+
"""
|
|
23
|
+
Constructor para inicializar la clase FeaturesResume.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
- info (str): Información de nodo o nodos, fecha, entre otras que puedan ser de interés.
|
|
27
|
+
"""
|
|
28
|
+
self.raw_data = raw_data
|
|
29
|
+
self.updateTagSeedling = updateTagSeedling
|
|
30
|
+
self.filtrar = filtrar
|
|
31
|
+
self.timeFilter = timeFilter
|
|
32
|
+
|
|
33
|
+
self.info = info
|
|
34
|
+
if not kwargs_fmcreator:
|
|
35
|
+
self.kwargs_fmcreator = {"imputeDistances":False, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
36
|
+
"dist_mismo_lugar":0.0, "max_dist":100,
|
|
37
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,}
|
|
38
|
+
else:
|
|
39
|
+
self.kwargs_fmcreator = kwargs_fmcreator
|
|
40
|
+
|
|
41
|
+
if not kwargs_classifier:
|
|
42
|
+
self.kwargs_classifier = {"proba_threshold":0.85,
|
|
43
|
+
"update_samePlace":False,
|
|
44
|
+
"update_dstpt":False,
|
|
45
|
+
"useRatioStats":False,
|
|
46
|
+
"std_weight":1.,
|
|
47
|
+
"useDistancesStats":False,
|
|
48
|
+
"ratio_dcdp_umbral":0.3,
|
|
49
|
+
"dist_umbral":0.5,
|
|
50
|
+
"umbral_bajo_dstpt":4,
|
|
51
|
+
"umbral_proba_dstpt":0.85}
|
|
52
|
+
else:
|
|
53
|
+
self.kwargs_classifier = kwargs_classifier
|
|
54
|
+
|
|
55
|
+
if timeFilter:
|
|
56
|
+
self.raw_data = self.filter_raw_by_time_window(**timeFilter)
|
|
57
|
+
|
|
58
|
+
self.plantinFMCreator = PlantinFMCreator(self.kwargs_fmcreator)
|
|
59
|
+
self.tid = TransformInputData()
|
|
60
|
+
self.data = self.transformRawData(self.raw_data)
|
|
61
|
+
if filtrar == 1:
|
|
62
|
+
self.data = self.data[self.data["tag_seedling"] == 1]
|
|
63
|
+
elif filtrar == 0:
|
|
64
|
+
self.data = self.data[self.data["tag_seedling"] == 0]
|
|
65
|
+
|
|
66
|
+
if "dst_pt" in self.data.columns:
|
|
67
|
+
self.data["dst_pt_ma"] = self.getSensorMA()
|
|
68
|
+
|
|
69
|
+
def transformRawData(self, raw_data):
|
|
70
|
+
"""
|
|
71
|
+
Método para pre-procesar la información y obtener un DataFrame con las características que se usan.
|
|
72
|
+
|
|
73
|
+
Características a tomar:
|
|
74
|
+
N_MODE
|
|
75
|
+
|
|
76
|
+
Retorna:
|
|
77
|
+
DataFrame con las características siguientes:
|
|
78
|
+
- nodo
|
|
79
|
+
- tag_seedling
|
|
80
|
+
- tag_seedling_probas
|
|
81
|
+
- raw_tag_seedling
|
|
82
|
+
- tag_fertilizer
|
|
83
|
+
- raw_tag_fertilizer
|
|
84
|
+
- deltaO
|
|
85
|
+
- ratio_dCdP
|
|
86
|
+
- distances
|
|
87
|
+
- precision: del gps
|
|
88
|
+
- dst_pt
|
|
89
|
+
- inest_pt
|
|
90
|
+
- latitud
|
|
91
|
+
- longitud
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
samples = self.tid.transform(raw_data) #transformo los datos
|
|
95
|
+
temp_rawdatadf = pd.DataFrame(raw_data)
|
|
96
|
+
temp_samplesdf = pd.DataFrame(samples)
|
|
97
|
+
temporal_features, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(samples)
|
|
98
|
+
columns = [ 'nodo',
|
|
99
|
+
'tag_seedling',
|
|
100
|
+
'tag_seed_probas1',
|
|
101
|
+
'tag_seed_probas0',
|
|
102
|
+
'raw_tag_seedling',
|
|
103
|
+
'tag_fertilizer',
|
|
104
|
+
'raw_tag_fertilizer',
|
|
105
|
+
'deltaO',
|
|
106
|
+
'ratio_dCdP',
|
|
107
|
+
'time_ac',
|
|
108
|
+
'distances',
|
|
109
|
+
'precision',
|
|
110
|
+
'dst_pt',
|
|
111
|
+
'inest_pt',
|
|
112
|
+
'latitud',
|
|
113
|
+
'longitud',
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
#genero df
|
|
117
|
+
data = pd.DataFrame(columns=columns)
|
|
118
|
+
data["nodo"] = temp_rawdatadf["nodo"]
|
|
119
|
+
tags_seed_updated, probas = self.classifiedData(**self.kwargs_classifier)
|
|
120
|
+
if self.updateTagSeedling:
|
|
121
|
+
data["tag_seedling"] = tags_seed_updated
|
|
122
|
+
else:
|
|
123
|
+
data["tag_seedling"] = temp_rawdatadf["tag_seedling"]
|
|
124
|
+
data["tag_seed_probas1"] = probas[:,1]
|
|
125
|
+
data["tag_seed_probas0"] = probas[:,0]
|
|
126
|
+
data["raw_tag_seedling"] = temp_rawdatadf["raw_tag_seedling"]
|
|
127
|
+
data["tag_fertilizer"] = temp_rawdatadf["tag_fertilizer"]
|
|
128
|
+
data["raw_tag_fertilizer"] = temp_rawdatadf["raw_tag_fertilizer"]
|
|
129
|
+
data["deltaO"] = temporal_features[:,0]
|
|
130
|
+
data["ratio_dCdP"] = temporal_features[:,1]
|
|
131
|
+
data["time_ac"] = temp_samplesdf["TIME_AC"]
|
|
132
|
+
data["distances"] = temporal_features[:,2]
|
|
133
|
+
data["precision"] = temp_samplesdf["precision"]
|
|
134
|
+
data["dst_pt"] = dst_pt
|
|
135
|
+
data["inest_pt"] = inest_pt
|
|
136
|
+
data["latitud"] = temp_samplesdf["latitud"]
|
|
137
|
+
data["longitud"] = temp_samplesdf["longitud"]
|
|
138
|
+
|
|
139
|
+
return data
|
|
140
|
+
|
|
141
|
+
def classifiedData(self, classifier_file = 'modelos\\pipeline_rf.pkl', **kwargs_classifier):
|
|
142
|
+
|
|
143
|
+
raw_X = self.tid.transform(self.raw_data)
|
|
144
|
+
X, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(raw_X)
|
|
145
|
+
|
|
146
|
+
rf_clf_wu = PlantinClassifier(classifier_file=classifier_file)
|
|
147
|
+
|
|
148
|
+
clasificaciones, probas = rf_clf_wu.classify(X, dst_pt, inest_pt, **kwargs_classifier)
|
|
149
|
+
|
|
150
|
+
return clasificaciones, probas
|
|
151
|
+
|
|
152
|
+
def removeOutliers(self, limits:dict={"deltaO": (0, 3600),
|
|
153
|
+
"precision": (0, 10000)}):
|
|
154
|
+
"""
|
|
155
|
+
Función para eliminar outliers de las características procesadas.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
##chqueo que columnas sí están dentro de self.data y limits.
|
|
159
|
+
##las que no están, se ignoran y se muestra un mensaje de warning
|
|
160
|
+
##actualizo las columnas dentro de limits eliminando las que no están en self.data
|
|
161
|
+
for col in list(limits.keys()):
|
|
162
|
+
if col not in self.data.columns:
|
|
163
|
+
logger.warning(f"La columna {col} no está en los datos y será ignorada.")
|
|
164
|
+
del limits[col]
|
|
165
|
+
|
|
166
|
+
##elimino outliers
|
|
167
|
+
for col, (lower, upper) in limits.items():
|
|
168
|
+
self.data = self.data[(self.data[col] >= lower) & (self.data[col] <= upper)]
|
|
169
|
+
|
|
170
|
+
def getResume(self, to="all", pctbajo_value=1, pctalto_value=14, lista_funciones=None):
|
|
171
|
+
"""
|
|
172
|
+
Método para obtener un resumen de las características procesadas.
|
|
173
|
+
Para todas las características se obtienen los siguientes estadísticos:
|
|
174
|
+
- count
|
|
175
|
+
- over_total
|
|
176
|
+
- media
|
|
177
|
+
- mediana
|
|
178
|
+
- desviación estándar (std)
|
|
179
|
+
- mínimo
|
|
180
|
+
- máximo
|
|
181
|
+
- skew
|
|
182
|
+
- kurtosis
|
|
183
|
+
|
|
184
|
+
Además, para el caso de distorsión de plantin (dst_pt) se agrega pctbajo y pctalto.
|
|
185
|
+
|
|
186
|
+
Se calculan para todos los datos y para tag_seedling = 1 e tag_seedling = 0
|
|
187
|
+
|
|
188
|
+
Se retorna una pivote_table usando los indexes = ["all","1s","0s"]
|
|
189
|
+
"""
|
|
190
|
+
if not lista_funciones:
|
|
191
|
+
lista_funciones = ["count", "mean", "median","std", "min", "max", "skew", "kurt"]
|
|
192
|
+
data_wo_node = self.data.copy()
|
|
193
|
+
data_wo_node = data_wo_node.drop(columns=["nodo"])
|
|
194
|
+
num_cols = data_wo_node.select_dtypes(include="number").columns
|
|
195
|
+
|
|
196
|
+
if to == 1:
|
|
197
|
+
data_wo_node = data_wo_node[data_wo_node["tag_seedling"] == 1]
|
|
198
|
+
elif to == 0:
|
|
199
|
+
data_wo_node = data_wo_node[data_wo_node["tag_seedling"] == 0]
|
|
200
|
+
|
|
201
|
+
stats = data_wo_node[num_cols].agg(lista_funciones)
|
|
202
|
+
|
|
203
|
+
operaciones = len(self.data)
|
|
204
|
+
over_val = (len(data_wo_node) / operaciones) if operaciones > 0 else np.nan
|
|
205
|
+
over = pd.Series(over_val, index=stats.columns, name="over_total")
|
|
206
|
+
|
|
207
|
+
arriba = stats.loc[["count"]]
|
|
208
|
+
abajo = stats.drop(index=["count"])
|
|
209
|
+
stats = pd.concat([arriba, over.to_frame().T, abajo], axis=0)
|
|
210
|
+
|
|
211
|
+
if "dst_pt" in data_wo_node.columns:
|
|
212
|
+
pct_bajo = float(np.mean(data_wo_node["dst_pt"] < pctbajo_value))
|
|
213
|
+
pct_alto = float(np.mean(data_wo_node["dst_pt"] > pctalto_value))
|
|
214
|
+
# Insertamos/actualizamos esas filas en la columna dst_pt.
|
|
215
|
+
stats.loc["pct_bajo", "dst_pt"] = pct_bajo
|
|
216
|
+
stats.loc["pct_alto", "dst_pt"] = pct_alto
|
|
217
|
+
|
|
218
|
+
##reemplazo los valores NaN por "no aplica"
|
|
219
|
+
stats = stats.fillna("not apply")
|
|
220
|
+
|
|
221
|
+
return stats
|
|
222
|
+
|
|
223
|
+
def getSensorMA(self, window_size=104, mode='same'):
|
|
224
|
+
"""
|
|
225
|
+
Función para calcular la media móvil de una serie temporal.
|
|
226
|
+
data: numpy array con los datos de la serie temporal
|
|
227
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
228
|
+
"""
|
|
229
|
+
return np.convolve(self.data["dst_pt"].values, np.ones(window_size)/window_size, mode=mode)
|
|
230
|
+
|
|
231
|
+
def generateSensorMA(self):
|
|
232
|
+
self.data["sensor_ma"] = self.getSensorMA()
|
|
233
|
+
|
|
234
|
+
def to_time_obj(self,t):
|
|
235
|
+
"""
|
|
236
|
+
Acepta 'HH:MM[:SS]' 24h o 'h:MM[:SS] a.m./p.m.' (con o sin puntos/espacios) y retorna datetime.time.
|
|
237
|
+
"""
|
|
238
|
+
if isinstance(t, time):
|
|
239
|
+
return t
|
|
240
|
+
s = str(t).strip().lower()
|
|
241
|
+
# normalizar variantes 'a.m.', 'a. m.', etc. → 'am'/'pm'
|
|
242
|
+
s = re.sub(r'\s+', '', s) # quitar espacios
|
|
243
|
+
s = s.replace('.', '') # quitar puntos
|
|
244
|
+
s = s.replace('a m', 'am').replace('p m', 'pm') # por si quedan
|
|
245
|
+
# 12h con am/pm
|
|
246
|
+
if 'am' in s or 'pm' in s:
|
|
247
|
+
for fmt in ('%I:%M:%S%p', '%I:%M%p'):
|
|
248
|
+
try: return datetime.strptime(s.upper(), fmt).time()
|
|
249
|
+
except ValueError: pass
|
|
250
|
+
raise ValueError(f"No pude interpretar la hora 12h: {t!r}")
|
|
251
|
+
# 24h
|
|
252
|
+
for fmt in ('%H:%M:%S', '%H:%M'):
|
|
253
|
+
try: return datetime.strptime(t, fmt).time()
|
|
254
|
+
except ValueError: pass
|
|
255
|
+
raise ValueError(f"No pude interpretar la hora 24h: {t!r}")
|
|
256
|
+
|
|
257
|
+
def time_to_td(self,t: time) -> pd.Timedelta:
|
|
258
|
+
return pd.Timedelta(hours=t.hour, minutes=t.minute, seconds=t.second, microseconds=t.microsecond)
|
|
259
|
+
|
|
260
|
+
def filter_raw_by_time_window(self,
|
|
261
|
+
start_time, end_time,
|
|
262
|
+
tz_target: str = "America/Montevideo",
|
|
263
|
+
timestamp_key: str = "timestamp",
|
|
264
|
+
inclusive: str = "both", # 'both' | 'neither' | 'left' | 'right',
|
|
265
|
+
inplace = False):
|
|
266
|
+
"""
|
|
267
|
+
Filtra registros cuyo 'timestamp' caiga entre [start_time, end_time] en la zona 'tz_target'.
|
|
268
|
+
- start_time/end_time: 'HH:MM[:SS]' 24h o 'h:MM[:SS] a.m./p.m.' o datetime.time
|
|
269
|
+
- Soporta ventanas que cruzan medianoche (p.ej. 23:30 a 01:15).
|
|
270
|
+
Retorna la misma estructura: lista de dicts si raw_data era lista; DataFrame si era DataFrame.
|
|
271
|
+
"""
|
|
272
|
+
df = pd.DataFrame(self.raw_data) if not isinstance(self.raw_data, pd.DataFrame) else self.raw_data.copy()
|
|
273
|
+
if timestamp_key not in df.columns:
|
|
274
|
+
raise KeyError(f"Columna {timestamp_key!r} no encontrada en los datos.")
|
|
275
|
+
|
|
276
|
+
# 1) Parseo y conversión de zona horaria
|
|
277
|
+
ts_utc = pd.to_datetime(df[timestamp_key], utc=True, errors='coerce')
|
|
278
|
+
if ts_utc.isna().any():
|
|
279
|
+
n_bad = int(ts_utc.isna().sum())
|
|
280
|
+
raise ValueError(f"Hay {n_bad} timestamps inválidos/imposibles de parsear.")
|
|
281
|
+
ts_local = ts_utc.dt.tz_convert(tz_target)
|
|
282
|
+
|
|
283
|
+
# 2) Hora-del-día como Timedelta desde medianoche local
|
|
284
|
+
tod = ts_local - ts_local.dt.normalize()
|
|
285
|
+
|
|
286
|
+
# 3) Ventana objetivo → Timedelta
|
|
287
|
+
t0 = self.time_to_td(self.to_time_obj(start_time))
|
|
288
|
+
t1 = self.time_to_td(self.to_time_obj(end_time))
|
|
289
|
+
|
|
290
|
+
# 4) Construcción de máscara (maneja cruce de medianoche)
|
|
291
|
+
if t0 <= t1:
|
|
292
|
+
mask = tod.between(t0, t1, inclusive=inclusive)
|
|
293
|
+
else:
|
|
294
|
+
# ejemplo: 23:30 → 01:15 (dos tramos)
|
|
295
|
+
mask = tod.ge(t0) | tod.le(t1)
|
|
296
|
+
if inclusive in ("neither", "right"): # ajustar extremos si no inclusivo
|
|
297
|
+
mask &= ~tod.eq(t0)
|
|
298
|
+
if inclusive in ("neither", "left"):
|
|
299
|
+
mask &= ~tod.eq(t1)
|
|
300
|
+
|
|
301
|
+
filtered = df[mask]
|
|
302
|
+
|
|
303
|
+
#si inplace, actualizo filtro raw_data y retorno un nuevo objeto FeaturesResume, sino retorno los datos filtrados
|
|
304
|
+
if inplace:
|
|
305
|
+
return filtered.to_dict(orient='records') if not isinstance(self.raw_data, pd.DataFrame) else filtered
|
|
306
|
+
else:
|
|
307
|
+
#copio el estado del objeto actual
|
|
308
|
+
new_fr = FeaturesResume(
|
|
309
|
+
raw_data = filtered.to_dict(orient='records') if not isinstance(self.raw_data, pd.DataFrame) else filtered,
|
|
310
|
+
info = self.info,
|
|
311
|
+
filtrar = self.filtrar,
|
|
312
|
+
updateTagSeedling = self.updateTagSeedling,
|
|
313
|
+
kwargs_fmcreator = self.kwargs_fmcreator,
|
|
314
|
+
kwargs_classifier = self.kwargs_classifier,
|
|
315
|
+
timeFilter = None # ya apliqué el filtro
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
return new_fr
|
|
319
|
+
|
|
320
|
+
def _get_ratiodCdPPlot(self, figsize = (10,6), show = False):
|
|
321
|
+
"""
|
|
322
|
+
Función para retornar (y graficar si se desea) un gráfico de línea de
|
|
323
|
+
ratio_dCdP y tag_seedling. El eje Y izquierdo es ratio y el derecho es el tag_seedling
|
|
324
|
+
"""
|
|
325
|
+
# Verificamos que existan las columnas necesarias
|
|
326
|
+
if "ratio_dCdP" not in self.data.columns or "tag_seedling" not in self.data.columns:
|
|
327
|
+
raise ValueError("Faltan columnas necesarias para graficar.")
|
|
328
|
+
|
|
329
|
+
fig, ax1 = plt.subplots(figsize=figsize)
|
|
330
|
+
|
|
331
|
+
# Eje izquierdo: ratio_dCdP
|
|
332
|
+
ax1.plot(self.data["ratio_dCdP"], label='ratio_dCdP', color='blue')
|
|
333
|
+
ax1.set_xlabel("Operación")
|
|
334
|
+
ax1.set_ylabel("Ratio dCdP", color='blue')
|
|
335
|
+
ax1.tick_params(axis='y', labelcolor='blue')
|
|
336
|
+
|
|
337
|
+
# Forzar eje Y en formato decimal
|
|
338
|
+
ax1.yaxis.set_major_formatter(ScalarFormatter(useMathText=False))
|
|
339
|
+
ax1.ticklabel_format(style='plain', axis='y') # Asegura formato decimal
|
|
340
|
+
|
|
341
|
+
# Eje derecho: tag_seedling
|
|
342
|
+
ax2 = ax1.twinx()
|
|
343
|
+
ax2.plot(self.data["tag_seedling"], label='tag_seedling', color='red')
|
|
344
|
+
ax2.set_ylabel("Tag Seedling", color='red')
|
|
345
|
+
ax2.tick_params(axis='y', labelcolor='red')
|
|
346
|
+
ax2.set_ylim(0, 5) # Limitar el eje Y de tag_seedling entre 0 y 5
|
|
347
|
+
|
|
348
|
+
plt.title(f"Análisis de {self.info} - Ratio dCdP y Tag Seedling")
|
|
349
|
+
fig.tight_layout()
|
|
350
|
+
|
|
351
|
+
if show:
|
|
352
|
+
plt.show()
|
|
353
|
+
|
|
354
|
+
return fig
|
|
355
|
+
|
|
356
|
+
def plotFeatureComparison(
|
|
357
|
+
self,
|
|
358
|
+
feature1: str,
|
|
359
|
+
feature2: str,
|
|
360
|
+
y1limits=None,
|
|
361
|
+
y2limits=None,
|
|
362
|
+
figsize=(10, 6),
|
|
363
|
+
title=None,
|
|
364
|
+
show=False,
|
|
365
|
+
save=False,
|
|
366
|
+
filename=None,
|
|
367
|
+
colors = ('blue', 'red'),
|
|
368
|
+
*,
|
|
369
|
+
line1: bool = True, # ¿dibujar línea en ax1?
|
|
370
|
+
line2: bool = True, # ¿dibujar línea en ax2?
|
|
371
|
+
marker1: str | None = None, # p.ej. 'o', 's', '^' para ax1
|
|
372
|
+
marker2: str | None = None, # p.ej. 'o', 's', '^' para ax2
|
|
373
|
+
markersize: float = 6
|
|
374
|
+
):
|
|
375
|
+
"""
|
|
376
|
+
Genera un gráfico de comparación entre dos características en ejes y diferentes.
|
|
377
|
+
Podés elegir si cada eje usa línea, solo marcadores, o ambos.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
- feature1, feature2: nombres de columnas en self.data.
|
|
381
|
+
- y1limits, y2limits: tuplas (ymin, ymax) opcionales.
|
|
382
|
+
- figsize: tamaño de la figura.
|
|
383
|
+
- show: si se muestra la figura.
|
|
384
|
+
- line1, line2: True = dibuja línea; False = solo marcadores (si se especifica marker).
|
|
385
|
+
- marker1, marker2: símbolos de marcador (ej. 'o'); None = sin marcador.
|
|
386
|
+
- markersize: tamaño del marcador.
|
|
387
|
+
"""
|
|
388
|
+
|
|
389
|
+
# chequeo que las características estén en los datos
|
|
390
|
+
if feature1 not in self.data.columns or feature2 not in self.data.columns:
|
|
391
|
+
raise ValueError("Faltan columnas necesarias para graficar.")
|
|
392
|
+
|
|
393
|
+
fig, ax1 = plt.subplots(figsize=figsize)
|
|
394
|
+
|
|
395
|
+
# ---- Eje izquierdo: feature1
|
|
396
|
+
ls1 = '-' if line1 else 'None' # 'None' evita trazar línea
|
|
397
|
+
ax1.plot(
|
|
398
|
+
self.data.index,
|
|
399
|
+
self.data[feature1].values,
|
|
400
|
+
label=feature1,
|
|
401
|
+
color=colors[0],
|
|
402
|
+
linestyle=ls1,
|
|
403
|
+
marker=marker1,
|
|
404
|
+
markersize=markersize
|
|
405
|
+
)
|
|
406
|
+
ax1.set_xlabel("Operación")
|
|
407
|
+
ax1.set_ylabel(feature1, color=colors[0])
|
|
408
|
+
ax1.tick_params(axis='y', labelcolor=colors[0])
|
|
409
|
+
|
|
410
|
+
# Formato decimal y límites opcionales
|
|
411
|
+
ax1.yaxis.set_major_formatter(ScalarFormatter(useMathText=False))
|
|
412
|
+
ax1.ticklabel_format(style='plain', axis='y')
|
|
413
|
+
if y1limits is not None:
|
|
414
|
+
ax1.set_ylim(y1limits)
|
|
415
|
+
|
|
416
|
+
# ---- Eje derecho: feature2
|
|
417
|
+
ax2 = ax1.twinx()
|
|
418
|
+
ls2 = '-' if line2 else 'None'
|
|
419
|
+
ax2.plot(
|
|
420
|
+
self.data.index,
|
|
421
|
+
self.data[feature2].values,
|
|
422
|
+
label=feature2,
|
|
423
|
+
color=colors[1],
|
|
424
|
+
linestyle=ls2,
|
|
425
|
+
marker=marker2,
|
|
426
|
+
markersize=markersize
|
|
427
|
+
)
|
|
428
|
+
ax2.set_ylabel(feature2, color=colors[1])
|
|
429
|
+
ax2.tick_params(axis='y', labelcolor=colors[1])
|
|
430
|
+
if y2limits is not None:
|
|
431
|
+
ax2.set_ylim(y2limits)
|
|
432
|
+
|
|
433
|
+
# Título y layout
|
|
434
|
+
if title is not None:
|
|
435
|
+
plt.title(title)
|
|
436
|
+
else:
|
|
437
|
+
plt.title(f"Análisis de {self.info} - {feature1} y {feature2}")
|
|
438
|
+
fig.tight_layout()
|
|
439
|
+
|
|
440
|
+
# Leyenda combinada de ambos ejes
|
|
441
|
+
lines1, labels1 = ax1.get_legend_handles_labels()
|
|
442
|
+
lines2, labels2 = ax2.get_legend_handles_labels()
|
|
443
|
+
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')
|
|
444
|
+
|
|
445
|
+
if show:
|
|
446
|
+
plt.show()
|
|
447
|
+
|
|
448
|
+
if save:
|
|
449
|
+
if filename is not None:
|
|
450
|
+
plt.savefig(filename)
|
|
451
|
+
else:
|
|
452
|
+
plt.savefig(f"feature_comparison_{feature1}_{feature2}.png")
|
|
453
|
+
plt.close(fig) # Cierra la figura para liberar memoria
|
|
454
|
+
|
|
455
|
+
##gráfico de dispersión para comparar la distribución de 0s y 1s
|
|
456
|
+
def plot_geo_compare(
|
|
457
|
+
self,
|
|
458
|
+
feature_col: str,
|
|
459
|
+
lat_col: str = "latitud",
|
|
460
|
+
lon_col: str = "longitud",
|
|
461
|
+
tag_col: str = "tag_seedling",
|
|
462
|
+
cmap: str = "winter",
|
|
463
|
+
figsize=(14, 6),
|
|
464
|
+
s: float = 10.0,
|
|
465
|
+
alpha: float = 0.8,
|
|
466
|
+
equal_aspect: bool = True,
|
|
467
|
+
# ---- NUEVO: control de colorbar y límites de color ----
|
|
468
|
+
vmin: float | None = None,
|
|
469
|
+
vmax: float | None = None,
|
|
470
|
+
cb_width: float = 0.02, # ancho relativo del colorbar (fracción del eje del mapa)
|
|
471
|
+
cb_pad: float = 0.02, # separación entre mapa y colorbar (en fracción)
|
|
472
|
+
cb_ticks: int | None = None, # número aprox. de ticks (None = automático)
|
|
473
|
+
):
|
|
474
|
+
# -------- Validación --------
|
|
475
|
+
df = self.data
|
|
476
|
+
required_cols = {lat_col, lon_col, tag_col, feature_col}
|
|
477
|
+
missing = [c for c in required_cols if c not in df.columns]
|
|
478
|
+
if missing:
|
|
479
|
+
raise ValueError(f"Faltan columnas en el DataFrame: {missing}")
|
|
480
|
+
|
|
481
|
+
# Datos y máscaras sin NaN
|
|
482
|
+
left = df[[lat_col, lon_col, tag_col]].dropna()
|
|
483
|
+
right = df[[lat_col, lon_col, feature_col]].dropna()
|
|
484
|
+
|
|
485
|
+
# -------- Figura principal (2 subplots, sin colorbar aún) --------
|
|
486
|
+
fig, (ax0, ax1) = plt.subplots(1, 2, figsize=figsize, constrained_layout=False)
|
|
487
|
+
plt.subplots_adjust(wspace=0.25, left=0.06, right=0.94, bottom=0.10, top=0.90)
|
|
488
|
+
|
|
489
|
+
# -------- Subplot izquierdo: binario rojo/verde --------
|
|
490
|
+
color_map = {1: "green", 0: "red"}
|
|
491
|
+
colors_left = left[tag_col].map(color_map).fillna("gray").values
|
|
492
|
+
ax0.scatter(left[lon_col], left[lat_col], c=colors_left, s=s, alpha=alpha, linewidths=0)
|
|
493
|
+
ax0.set_xlabel("Longitud"); ax0.set_ylabel("Latitud")
|
|
494
|
+
ax0.set_title("Semilleros (verde=1, rojo=0)")
|
|
495
|
+
from matplotlib.lines import Line2D
|
|
496
|
+
leg = [
|
|
497
|
+
Line2D([0],[0], marker='o', color='w', label=f"{tag_col} = 1", markerfacecolor='green', markersize=8),
|
|
498
|
+
Line2D([0],[0], marker='o', color='w', label=f"{tag_col} = 0", markerfacecolor='red', markersize=8),
|
|
499
|
+
]
|
|
500
|
+
if (~left[tag_col].isin([0,1])).any():
|
|
501
|
+
leg.append(Line2D([0],[0], marker='o', color='w', label=f"{tag_col} ≠ 0/1", markerfacecolor='gray', markersize=8))
|
|
502
|
+
ax0.legend(handles=leg, loc="best", frameon=True)
|
|
503
|
+
|
|
504
|
+
# -------- Subplot derecho: continuo por feature con vmin/vmax --------
|
|
505
|
+
vals = right[feature_col].to_numpy(dtype=float)
|
|
506
|
+
# límites automáticos si no se pasan
|
|
507
|
+
vmin_eff = np.nanmin(vals) if vmin is None else float(vmin)
|
|
508
|
+
vmax_eff = np.nanmax(vals) if vmax is None else float(vmax)
|
|
509
|
+
|
|
510
|
+
sc = ax1.scatter(
|
|
511
|
+
right[lon_col], right[lat_col],
|
|
512
|
+
c=vals, cmap=cmap, vmin=vmin_eff, vmax=vmax_eff,
|
|
513
|
+
s=s, alpha=alpha, linewidths=0
|
|
514
|
+
)
|
|
515
|
+
ax1.set_xlabel("Longitud"); ax1.set_ylabel("Latitud")
|
|
516
|
+
ax1.set_title(f"Color por feature: {feature_col}")
|
|
517
|
+
|
|
518
|
+
# -------- Colorbar delgado adosado al segundo mapa --------
|
|
519
|
+
divider = make_axes_locatable(ax1)
|
|
520
|
+
# 'size' puede ser porcentaje del eje del mapa (p.ej. "2%")
|
|
521
|
+
cax = divider.append_axes("right", size=f"{cb_width*100:.1f}%", pad=cb_pad)
|
|
522
|
+
cbar = fig.colorbar(sc, cax=cax)
|
|
523
|
+
cbar.set_label(feature_col)
|
|
524
|
+
if cb_ticks is not None and cb_ticks > 0:
|
|
525
|
+
cbar.locator = plt.MaxNLocator(cb_ticks)
|
|
526
|
+
cbar.update_ticks()
|
|
527
|
+
# Opcional: tipografía/tamaño de ticks del colorbar
|
|
528
|
+
cbar.ax.tick_params(labelsize=8)
|
|
529
|
+
|
|
530
|
+
# -------- Ajustes comunes --------
|
|
531
|
+
if equal_aspect:
|
|
532
|
+
ax0.set_aspect('equal', adjustable='box')
|
|
533
|
+
ax1.set_aspect('equal', adjustable='box')
|
|
534
|
+
|
|
535
|
+
# Misma caja geográfica en ambos paneles para comparación directa
|
|
536
|
+
xmin = np.nanmin(df[lon_col].to_numpy())
|
|
537
|
+
xmax = np.nanmax(df[lon_col].to_numpy())
|
|
538
|
+
ymin = np.nanmin(df[lat_col].to_numpy())
|
|
539
|
+
ymax = np.nanmax(df[lat_col].to_numpy())
|
|
540
|
+
for ax in (ax0, ax1):
|
|
541
|
+
ax.set_xlim(xmin, xmax)
|
|
542
|
+
ax.set_ylim(ymin, ymax)
|
|
543
|
+
|
|
544
|
+
plt.show()
|
|
545
|
+
|
|
546
|
+
if __name__ == "__main__":
|
|
547
|
+
import json
|
|
548
|
+
from sarapy.utils import dataMerging
|
|
549
|
+
import numpy as np
|
|
550
|
+
import matplotlib.pyplot as plt
|
|
551
|
+
from sarapy.utils.plotting import plotTemporalData
|
|
552
|
+
plt.style.use('bmh')
|
|
553
|
+
|
|
554
|
+
pkg_logger = logging.getLogger("sarapy.stats")
|
|
555
|
+
pkg_logger.setLevel(logging.ERROR)
|
|
556
|
+
|
|
557
|
+
kwargs_fmcreator = {"imputeDistances":False, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
558
|
+
"dist_mismo_lugar":0.0, "max_dist":100,
|
|
559
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,}
|
|
560
|
+
|
|
561
|
+
kwargs_classifier = {"proba_threshold":0.85,
|
|
562
|
+
"update_samePlace":False,
|
|
563
|
+
"update_dstpt":False,
|
|
564
|
+
"useRatioStats":False,
|
|
565
|
+
"std_weight":1.,
|
|
566
|
+
"useDistancesStats":False,
|
|
567
|
+
"ratio_dcdp_umbral":0.3,
|
|
568
|
+
"dist_umbral":0.5,
|
|
569
|
+
"umbral_bajo_dstpt":4,
|
|
570
|
+
"umbral_proba_dstpt":0.7}
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
time_filter=None
|
|
574
|
+
|
|
575
|
+
nodo = "UPM039N"
|
|
576
|
+
fecha = "2025-09-04"
|
|
577
|
+
save = True
|
|
578
|
+
show = False
|
|
579
|
+
|
|
580
|
+
hdpath = f"examples\\{fecha}\\{nodo}\\historical-data.json" #historical file
|
|
581
|
+
pppath = f"examples\\{fecha}\\{nodo}\\post-processing-data.json" #post-processing file
|
|
582
|
+
raw_data = f"examples\\{fecha}\\{nodo}\\data.json" #raw file
|
|
583
|
+
|
|
584
|
+
with open(hdpath, 'r') as file:
|
|
585
|
+
historical_data = json.load(file)
|
|
586
|
+
with open(pppath, 'r') as file:
|
|
587
|
+
post_data = json.load(file)
|
|
588
|
+
with open(raw_data, 'r') as file:
|
|
589
|
+
raw_data = json.load(file)
|
|
590
|
+
|
|
591
|
+
merged_data = dataMerging(historical_data, post_data, raw_data, nodoName=nodo,newColumns=False, asDF=False)
|
|
592
|
+
|
|
593
|
+
fr = FeaturesResume(merged_data, info = nodo, filtrar=None,
|
|
594
|
+
kwargs_classifier=kwargs_classifier, updateTagSeedling=True, timeFilter=time_filter)
|
|
595
|
+
|
|
596
|
+
fr.removeOutliers({"deltaO": (0, 3600),
|
|
597
|
+
"time_ac": (0, 100),
|
|
598
|
+
"ratio_dCdP": (-50, 2),
|
|
599
|
+
"precision": (0, 10000),
|
|
600
|
+
"distances": (0, 5000)})
|
|
601
|
+
|
|
602
|
+
print(fr.data["tag_seedling"].value_counts(normalize=True))
|
|
603
|
+
print(fr.getResume(to="all"))
|
|
604
|
+
|
|
605
|
+
time_filter = {"start_time": "13:29:13",
|
|
606
|
+
"end_time": "13:43:19",
|
|
607
|
+
"tz_target": "America/Montevideo",
|
|
608
|
+
"timestamp_key": "timestamp",
|
|
609
|
+
"inclusive": "both", # 'both' | 'neither' | 'left' | 'right',
|
|
610
|
+
"inplace": False
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
new_fr = fr.filter_raw_by_time_window(**time_filter)
|
|
614
|
+
print(new_fr.getResume(to="all"))
|
|
615
|
+
new_fr.plotFeatureComparison("dst_pt_ma", "tag_seed_probas1", figsize=(12, 8),
|
|
616
|
+
show=True, line2=True, marker2=None)
|
|
617
|
+
new_fr.plotFeatureComparison("dst_pt_ma", "tag_seedling", figsize=(12, 8),
|
|
618
|
+
show=True, line2=True, marker2=None)
|