sarapy 2.3.0__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sarapy-2.3.0/sarapy.egg-info → sarapy-3.0.0}/PKG-INFO +34 -1
- {sarapy-2.3.0 → sarapy-3.0.0}/README.md +33 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/pyproject.toml +1 -1
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/analysis/FeaturesResume.py +151 -47
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/dataProcessing/OpsProcessor.py +44 -30
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/dataProcessing/TLMSensorDataProcessor.py +5 -2
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/mlProcessors/FertilizerTransformer.py +7 -5
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/mlProcessors/PlantinClassifier.py +65 -23
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/mlProcessors/PlantinFMCreator.py +25 -12
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/preprocessing/TransformInputData.py +3 -2
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/version.py +1 -1
- {sarapy-2.3.0 → sarapy-3.0.0/sarapy.egg-info}/PKG-INFO +34 -1
- {sarapy-2.3.0 → sarapy-3.0.0}/LICENCE +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/analysis/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/dataProcessing/GeoProcessor.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/dataProcessing/TimeSeriesProcessor.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/dataProcessing/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/mlProcessors/FertilizerFMCreator.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/mlProcessors/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/preprocessing/DistancesImputer.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/preprocessing/FertilizerImputer.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/preprocessing/TransformToOutputData.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/preprocessing/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/stats/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/stats/stats.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/utils/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/utils/plotting.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy/utils/utils.py +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy.egg-info/SOURCES.txt +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy.egg-info/dependency_links.txt +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy.egg-info/requires.txt +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/sarapy.egg-info/top_level.txt +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/setup.cfg +0 -0
- {sarapy-2.3.0 → sarapy-3.0.0}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sarapy
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Home-page: https://github.com/lucasbaldezzari/sarapy
|
|
5
5
|
Author: Lucas Baldezzari
|
|
6
6
|
Author-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
@@ -19,6 +19,39 @@ Requires-Dist: geopy
|
|
|
19
19
|
|
|
20
20
|
Library for processing SARAPICO project metadata of _AMG SA_.
|
|
21
21
|
|
|
22
|
+
#### Version 3.0.0
|
|
23
|
+
- Se mejora la forma de obtener valores de media movil para todas las variables en las que se usa.
|
|
24
|
+
- Se corrigen bugs debido a nodos con pocas operaciones.
|
|
25
|
+
- Se corrigen errores a la hora de pasar parámetros a los métodos de algunas clases.
|
|
26
|
+
- Se configuran parámetros de fmcreator y plantin_classifier para el reetiquetado, los mismos son:
|
|
27
|
+
|
|
28
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
29
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
30
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
31
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
32
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
36
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
37
|
+
"use_proba_ma":False,
|
|
38
|
+
"proba_ma_window":10,
|
|
39
|
+
"update_samePlace":True,
|
|
40
|
+
"update_dstpt":True,
|
|
41
|
+
"umbral_proba_dstpt":0.5,
|
|
42
|
+
"umbral_bajo_dstpt":1.5,
|
|
43
|
+
"use_ma":True,
|
|
44
|
+
"dstpt_ma_window":62,
|
|
45
|
+
"use_min_dstpt":False,
|
|
46
|
+
"factor":0.1,
|
|
47
|
+
|
|
48
|
+
"useRatioStats":False,
|
|
49
|
+
"std_weight":1.,
|
|
50
|
+
"useDistancesStats":False,
|
|
51
|
+
"ratio_dcdp_umbral":0.1,
|
|
52
|
+
"dist_umbral":0.5,
|
|
53
|
+
}
|
|
54
|
+
|
|
22
55
|
#### Version 2.3.0
|
|
23
56
|
|
|
24
57
|
- Se agregan funcionalidades.
|
|
@@ -2,6 +2,39 @@
|
|
|
2
2
|
|
|
3
3
|
Library for processing SARAPICO project metadata of _AMG SA_.
|
|
4
4
|
|
|
5
|
+
#### Version 3.0.0
|
|
6
|
+
- Se mejora la forma de obtener valores de media movil para todas las variables en las que se usa.
|
|
7
|
+
- Se corrigen bugs debido a nodos con pocas operaciones.
|
|
8
|
+
- Se corrigen errores a la hora de pasar parámetros a los métodos de algunas clases.
|
|
9
|
+
- Se configuran parámetros de fmcreator y plantin_classifier para el reetiquetado, los mismos son:
|
|
10
|
+
|
|
11
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
12
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
13
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
14
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
15
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
19
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
20
|
+
"use_proba_ma":False,
|
|
21
|
+
"proba_ma_window":10,
|
|
22
|
+
"update_samePlace":True,
|
|
23
|
+
"update_dstpt":True,
|
|
24
|
+
"umbral_proba_dstpt":0.5,
|
|
25
|
+
"umbral_bajo_dstpt":1.5,
|
|
26
|
+
"use_ma":True,
|
|
27
|
+
"dstpt_ma_window":62,
|
|
28
|
+
"use_min_dstpt":False,
|
|
29
|
+
"factor":0.1,
|
|
30
|
+
|
|
31
|
+
"useRatioStats":False,
|
|
32
|
+
"std_weight":1.,
|
|
33
|
+
"useDistancesStats":False,
|
|
34
|
+
"ratio_dcdp_umbral":0.1,
|
|
35
|
+
"dist_umbral":0.5,
|
|
36
|
+
}
|
|
37
|
+
|
|
5
38
|
#### Version 2.3.0
|
|
6
39
|
|
|
7
40
|
- Se agregan funcionalidades.
|
|
@@ -17,8 +17,8 @@ import re
|
|
|
17
17
|
from datetime import datetime, time
|
|
18
18
|
|
|
19
19
|
class FeaturesResume():
|
|
20
|
-
def __init__(self, raw_data, info="", filtrar=None, updateTagSeedling=False,
|
|
21
|
-
kwargs_fmcreator=None, kwargs_classifier=None, timeFilter=None):
|
|
20
|
+
def __init__(self, raw_data, info="", filtrar=None, updateTagSeedling=False, outliers=None,
|
|
21
|
+
kwargs_fmcreator=None, kwargs_classifier=None, timeFilter=None, window_size_ma=104):
|
|
22
22
|
"""
|
|
23
23
|
Constructor para inicializar la clase FeaturesResume.
|
|
24
24
|
|
|
@@ -29,42 +29,62 @@ class FeaturesResume():
|
|
|
29
29
|
self.updateTagSeedling = updateTagSeedling
|
|
30
30
|
self.filtrar = filtrar
|
|
31
31
|
self.timeFilter = timeFilter
|
|
32
|
+
self.outliers = outliers
|
|
33
|
+
self.window_size_ma = window_size_ma
|
|
32
34
|
|
|
33
35
|
self.info = info
|
|
34
36
|
if not kwargs_fmcreator:
|
|
35
|
-
self.kwargs_fmcreator = {"imputeDistances":
|
|
36
|
-
"dist_mismo_lugar":0.
|
|
37
|
-
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
37
|
+
self.kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
38
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
39
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
40
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.8,
|
|
41
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
38
42
|
else:
|
|
39
43
|
self.kwargs_fmcreator = kwargs_fmcreator
|
|
40
44
|
|
|
41
45
|
if not kwargs_classifier:
|
|
42
|
-
self.kwargs_classifier = {"proba_threshold":0.
|
|
43
|
-
"
|
|
44
|
-
"
|
|
46
|
+
self.kwargs_classifier = {"proba_threshold":0.2,
|
|
47
|
+
"use_proba_ma":False,
|
|
48
|
+
"proba_ma_window":10,
|
|
49
|
+
"update_samePlace":True,
|
|
50
|
+
"update_dstpt":True,
|
|
45
51
|
"useRatioStats":False,
|
|
46
52
|
"std_weight":1.,
|
|
47
53
|
"useDistancesStats":False,
|
|
48
|
-
"ratio_dcdp_umbral":0.
|
|
54
|
+
"ratio_dcdp_umbral":0.0,
|
|
49
55
|
"dist_umbral":0.5,
|
|
50
56
|
"umbral_bajo_dstpt":4,
|
|
51
|
-
"umbral_proba_dstpt":0.
|
|
57
|
+
"umbral_proba_dstpt":0.70,
|
|
58
|
+
"use_ma":True,
|
|
59
|
+
"dstpt_ma_window":104,
|
|
60
|
+
"use_min_dstpt":False,
|
|
61
|
+
"factor":0.1}
|
|
52
62
|
else:
|
|
53
63
|
self.kwargs_classifier = kwargs_classifier
|
|
54
64
|
|
|
55
65
|
if timeFilter:
|
|
56
66
|
self.raw_data = self.filter_raw_by_time_window(**timeFilter)
|
|
57
67
|
|
|
58
|
-
self.plantinFMCreator = PlantinFMCreator(self.kwargs_fmcreator)
|
|
68
|
+
self.plantinFMCreator = PlantinFMCreator(**self.kwargs_fmcreator)
|
|
59
69
|
self.tid = TransformInputData()
|
|
60
70
|
self.data = self.transformRawData(self.raw_data)
|
|
61
|
-
|
|
71
|
+
|
|
72
|
+
if self.filtrar == 1:
|
|
62
73
|
self.data = self.data[self.data["tag_seedling"] == 1]
|
|
63
|
-
elif filtrar == 0:
|
|
74
|
+
elif self.filtrar == 0:
|
|
64
75
|
self.data = self.data[self.data["tag_seedling"] == 0]
|
|
65
76
|
|
|
66
77
|
if "dst_pt" in self.data.columns:
|
|
67
|
-
self.data["
|
|
78
|
+
if len(self.data["dst_pt"]) < window_size_ma:
|
|
79
|
+
self.data["dst_pt_ma"] = self.getSensorMA(window_size=len(self.data["dst_pt"]))
|
|
80
|
+
else:
|
|
81
|
+
self.data["dst_pt_ma"] = self.getSensorMA(window_size=window_size_ma)
|
|
82
|
+
|
|
83
|
+
if "tag_seed_probas1" in self.data.columns:
|
|
84
|
+
if len(self.data["tag_seed_probas1"]) < window_size_ma:
|
|
85
|
+
self.data["tag_seed_probas1_ma"] = self.getProbasMA(window_size=len(self.data["tag_seed_probas1"]))
|
|
86
|
+
else:
|
|
87
|
+
self.data["tag_seed_probas1_ma"] = self.getProbasMA(window_size=window_size_ma)
|
|
68
88
|
|
|
69
89
|
def transformRawData(self, raw_data):
|
|
70
90
|
"""
|
|
@@ -136,6 +156,9 @@ class FeaturesResume():
|
|
|
136
156
|
data["latitud"] = temp_samplesdf["latitud"]
|
|
137
157
|
data["longitud"] = temp_samplesdf["longitud"]
|
|
138
158
|
|
|
159
|
+
if self.outliers:
|
|
160
|
+
data = self.removeOutliers(data.copy(), self.outliers)
|
|
161
|
+
|
|
139
162
|
return data
|
|
140
163
|
|
|
141
164
|
def classifiedData(self, classifier_file = 'modelos\\pipeline_rf.pkl', **kwargs_classifier):
|
|
@@ -143,13 +166,18 @@ class FeaturesResume():
|
|
|
143
166
|
raw_X = self.tid.transform(self.raw_data)
|
|
144
167
|
X, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(raw_X)
|
|
145
168
|
|
|
146
|
-
|
|
169
|
+
# ratio_dcdp_median = np.median(X[:, 1])
|
|
170
|
+
##reemplazo los datos de X[:, 1] por la mediana si están por debajo de -10
|
|
171
|
+
# X[:, 1] = np.where(X[:, 1] < -0.8, ratio_dcdp_median, X[:, 1])
|
|
172
|
+
# X[:, 0] = self.getMA(X[:, 0], window_size=26)
|
|
173
|
+
|
|
174
|
+
clasificador = PlantinClassifier(classifier_file=classifier_file)
|
|
147
175
|
|
|
148
|
-
clasificaciones, probas =
|
|
176
|
+
clasificaciones, probas = clasificador.classify(X, dst_pt, inest_pt, **kwargs_classifier)
|
|
149
177
|
|
|
150
178
|
return clasificaciones, probas
|
|
151
179
|
|
|
152
|
-
def removeOutliers(self, limits:dict={"deltaO": (0, 3600),
|
|
180
|
+
def removeOutliers(self, data, limits:dict={"deltaO": (0, 3600),
|
|
153
181
|
"precision": (0, 10000)}):
|
|
154
182
|
"""
|
|
155
183
|
Función para eliminar outliers de las características procesadas.
|
|
@@ -158,14 +186,17 @@ class FeaturesResume():
|
|
|
158
186
|
##chqueo que columnas sí están dentro de self.data y limits.
|
|
159
187
|
##las que no están, se ignoran y se muestra un mensaje de warning
|
|
160
188
|
##actualizo las columnas dentro de limits eliminando las que no están en self.data
|
|
189
|
+
|
|
161
190
|
for col in list(limits.keys()):
|
|
162
|
-
if col not in
|
|
191
|
+
if col not in data.columns:
|
|
163
192
|
logger.warning(f"La columna {col} no está en los datos y será ignorada.")
|
|
164
193
|
del limits[col]
|
|
165
194
|
|
|
166
195
|
##elimino outliers
|
|
167
196
|
for col, (lower, upper) in limits.items():
|
|
168
|
-
|
|
197
|
+
data = data[(data[col] >= lower) & (data[col] <= upper)]
|
|
198
|
+
|
|
199
|
+
return data
|
|
169
200
|
|
|
170
201
|
def getResume(self, to="all", pctbajo_value=1, pctalto_value=14, lista_funciones=None):
|
|
171
202
|
"""
|
|
@@ -226,10 +257,44 @@ class FeaturesResume():
|
|
|
226
257
|
data: numpy array con los datos de la serie temporal
|
|
227
258
|
window_size: tamaño de la ventana para calcular la media móvil
|
|
228
259
|
"""
|
|
229
|
-
return np.convolve(self.data["dst_pt"].values, np.ones(window_size)/window_size, mode=mode)
|
|
260
|
+
# return np.convolve(self.data["dst_pt"].values, np.ones(window_size)/window_size, mode=mode)
|
|
261
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
262
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
263
|
+
padding_start = self.data["dst_pt"].values[0:window_size]
|
|
264
|
+
padding_end = self.data["dst_pt"].values[-window_size:]
|
|
265
|
+
padded_data = np.concatenate([padding_start, self.data["dst_pt"].values, padding_end])
|
|
266
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
267
|
+
return ma_full[window_size: -window_size]
|
|
230
268
|
|
|
231
|
-
def
|
|
232
|
-
|
|
269
|
+
def getProbasMA(self, window_size=104, mode='same'):
|
|
270
|
+
"""
|
|
271
|
+
Función para calcular la media móvil de una serie temporal.
|
|
272
|
+
data: numpy array con los datos de la serie temporal
|
|
273
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
274
|
+
"""
|
|
275
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
276
|
+
##copio los primeros y últimos valores usando la misma cantidad que window_size
|
|
277
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
278
|
+
padding_start = self.data["tag_seed_probas1"].values[0:window_size]
|
|
279
|
+
padding_end = self.data["tag_seed_probas1"].values[-window_size:]
|
|
280
|
+
padded_data = np.concatenate([padding_start, self.data["tag_seed_probas1"].values, padding_end])
|
|
281
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
282
|
+
return ma_full[window_size: -window_size]
|
|
283
|
+
|
|
284
|
+
def getMA(self, data: np.array, window_size=104, mode='same'):
|
|
285
|
+
"""
|
|
286
|
+
Función para calcular la media móvil de una serie temporal.
|
|
287
|
+
data: numpy array con los datos de la serie temporal
|
|
288
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
289
|
+
"""
|
|
290
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
291
|
+
##copio los primeros y últimos valores usando la misma cantidad que window_size
|
|
292
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
293
|
+
padding_start = data[0:window_size]
|
|
294
|
+
padding_end = data[-window_size:]
|
|
295
|
+
padded_data = np.concatenate([padding_start, data, padding_end])
|
|
296
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
297
|
+
return ma_full[window_size: -window_size]
|
|
233
298
|
|
|
234
299
|
def to_time_obj(self,t):
|
|
235
300
|
"""
|
|
@@ -299,6 +364,13 @@ class FeaturesResume():
|
|
|
299
364
|
mask &= ~tod.eq(t1)
|
|
300
365
|
|
|
301
366
|
filtered = df[mask]
|
|
367
|
+
#me quedo con los indices donde se cumpla df[mask] y aplico a self.raw_data de origen
|
|
368
|
+
|
|
369
|
+
##chequeo que filtered no esté vacio, sino retorno None
|
|
370
|
+
if filtered.empty or len(filtered) < 10:
|
|
371
|
+
logger.warning("El filtro de tiempo resultó en un conjunto vacío.")
|
|
372
|
+
print("El filtro de tiempo resultó en un conjunto vacío.")
|
|
373
|
+
return None
|
|
302
374
|
|
|
303
375
|
#si inplace, actualizo filtro raw_data y retorno un nuevo objeto FeaturesResume, sino retorno los datos filtrados
|
|
304
376
|
if inplace:
|
|
@@ -312,7 +384,9 @@ class FeaturesResume():
|
|
|
312
384
|
updateTagSeedling = self.updateTagSeedling,
|
|
313
385
|
kwargs_fmcreator = self.kwargs_fmcreator,
|
|
314
386
|
kwargs_classifier = self.kwargs_classifier,
|
|
315
|
-
timeFilter = None # ya apliqué el filtro
|
|
387
|
+
timeFilter = None, # ya apliqué el filtro
|
|
388
|
+
outliers = self.outliers,
|
|
389
|
+
window_size_ma=self.window_size_ma,
|
|
316
390
|
)
|
|
317
391
|
|
|
318
392
|
return new_fr
|
|
@@ -374,7 +448,7 @@ class FeaturesResume():
|
|
|
374
448
|
):
|
|
375
449
|
"""
|
|
376
450
|
Genera un gráfico de comparación entre dos características en ejes y diferentes.
|
|
377
|
-
|
|
451
|
+
Se puede elegir si cada eje usa línea, solo marcadores, o ambos.
|
|
378
452
|
|
|
379
453
|
Args:
|
|
380
454
|
- feature1, feature2: nombres de columnas en self.data.
|
|
@@ -442,15 +516,16 @@ class FeaturesResume():
|
|
|
442
516
|
lines2, labels2 = ax2.get_legend_handles_labels()
|
|
443
517
|
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')
|
|
444
518
|
|
|
445
|
-
if show:
|
|
446
|
-
plt.show()
|
|
447
|
-
|
|
448
519
|
if save:
|
|
449
520
|
if filename is not None:
|
|
450
521
|
plt.savefig(filename)
|
|
451
522
|
else:
|
|
452
523
|
plt.savefig(f"feature_comparison_{feature1}_{feature2}.png")
|
|
453
|
-
|
|
524
|
+
|
|
525
|
+
if show:
|
|
526
|
+
plt.show()
|
|
527
|
+
else:
|
|
528
|
+
plt.close(fig) # Cierra la figura para liberar memoria
|
|
454
529
|
|
|
455
530
|
##gráfico de dispersión para comparar la distribución de 0s y 1s
|
|
456
531
|
def plot_geo_compare(
|
|
@@ -464,6 +539,9 @@ class FeaturesResume():
|
|
|
464
539
|
s: float = 10.0,
|
|
465
540
|
alpha: float = 0.8,
|
|
466
541
|
equal_aspect: bool = True,
|
|
542
|
+
save = False,
|
|
543
|
+
show = True,
|
|
544
|
+
filename = None,
|
|
467
545
|
# ---- NUEVO: control de colorbar y límites de color ----
|
|
468
546
|
vmin: float | None = None,
|
|
469
547
|
vmax: float | None = None,
|
|
@@ -541,7 +619,15 @@ class FeaturesResume():
|
|
|
541
619
|
ax.set_xlim(xmin, xmax)
|
|
542
620
|
ax.set_ylim(ymin, ymax)
|
|
543
621
|
|
|
544
|
-
|
|
622
|
+
|
|
623
|
+
if save:
|
|
624
|
+
if filename is not None:
|
|
625
|
+
plt.savefig(filename)
|
|
626
|
+
else:
|
|
627
|
+
plt.savefig(f"geo_compare_{feature_col}.png")
|
|
628
|
+
if show:
|
|
629
|
+
plt.show()
|
|
630
|
+
plt.close(fig) # Cierra la figura para liberar memoria
|
|
545
631
|
|
|
546
632
|
if __name__ == "__main__":
|
|
547
633
|
import json
|
|
@@ -554,20 +640,33 @@ if __name__ == "__main__":
|
|
|
554
640
|
pkg_logger = logging.getLogger("sarapy.stats")
|
|
555
641
|
pkg_logger.setLevel(logging.ERROR)
|
|
556
642
|
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
643
|
+
## argumentos de PlantinFMCreator
|
|
644
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
645
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
646
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
647
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
648
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
652
|
+
kwargs_classifier = {"proba_threshold":0.45,
|
|
653
|
+
"use_proba_ma":False,
|
|
654
|
+
"proba_ma_window":10,
|
|
655
|
+
"update_samePlace":True,
|
|
656
|
+
"update_dstpt":True,
|
|
657
|
+
"umbral_proba_dstpt":0.5,
|
|
658
|
+
"umbral_bajo_dstpt":1.5,
|
|
659
|
+
"use_ma":True,
|
|
660
|
+
"dstpt_ma_window":62,
|
|
661
|
+
"use_min_dstpt":False,
|
|
662
|
+
"factor":0.1,
|
|
663
|
+
|
|
564
664
|
"useRatioStats":False,
|
|
565
665
|
"std_weight":1.,
|
|
566
666
|
"useDistancesStats":False,
|
|
567
|
-
"ratio_dcdp_umbral":0.
|
|
667
|
+
"ratio_dcdp_umbral":0.1,
|
|
568
668
|
"dist_umbral":0.5,
|
|
569
|
-
|
|
570
|
-
"umbral_proba_dstpt":0.7}
|
|
669
|
+
}
|
|
571
670
|
|
|
572
671
|
|
|
573
672
|
time_filter=None
|
|
@@ -590,14 +689,19 @@ if __name__ == "__main__":
|
|
|
590
689
|
|
|
591
690
|
merged_data = dataMerging(historical_data, post_data, raw_data, nodoName=nodo,newColumns=False, asDF=False)
|
|
592
691
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
692
|
+
outliers = {
|
|
693
|
+
"ratio_dCdP": (-5, 2),
|
|
694
|
+
"deltaO": (0, 3600),
|
|
695
|
+
"time_ac": (0, 100),
|
|
696
|
+
"precision": (0, 5000),
|
|
697
|
+
"distances": (0, 100)
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
fr = FeaturesResume(merged_data, info = nodo, filtrar=None, outliers=outliers,
|
|
701
|
+
kwargs_classifier=kwargs_classifier,
|
|
702
|
+
kwargs_fmcreator=kwargs_fmcreator,
|
|
703
|
+
updateTagSeedling=True, timeFilter=None,
|
|
704
|
+
window_size_ma=62)
|
|
601
705
|
|
|
602
706
|
print(fr.data["tag_seedling"].value_counts(normalize=True))
|
|
603
707
|
print(fr.getResume(to="all"))
|
|
@@ -43,7 +43,8 @@ class OpsProcessor():
|
|
|
43
43
|
kwargs_plclass[key] = value
|
|
44
44
|
|
|
45
45
|
fmcreator_map = {"imputeDistances", "distanciaMedia", "umbral_precision",
|
|
46
|
-
"dist_mismo_lugar", "max_dist", "umbral_ratio_dCdP", "deltaO_medio"
|
|
46
|
+
"dist_mismo_lugar", "max_dist", "umbral_ratio_dCdP", "deltaO_medio",
|
|
47
|
+
"impute_ratiodcdp", "umbral_impute_ratiodcdp", "deltaO_ma", "deltaO_ma_window"}
|
|
47
48
|
fmcreator_kargs = {}
|
|
48
49
|
##recorro kwargs y usando fmcreator_map creo un nuevo diccionario con los valores que se pasaron
|
|
49
50
|
for key, value in kwargs.items():
|
|
@@ -188,7 +189,7 @@ class OpsProcessor():
|
|
|
188
189
|
logging.debug(f"Número de operaciones para el nodo {ID_NPDP}: {len(operations)}")
|
|
189
190
|
features, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(operations)
|
|
190
191
|
logging.debug(f"Features shape for {ID_NPDP}: {features.shape}")
|
|
191
|
-
classified_ops, classifications_probas = self._plantin_classifier.classify(features, dst_pt, inest_pt, **
|
|
192
|
+
classified_ops, classifications_probas = self._plantin_classifier.classify(features, dst_pt, inest_pt, **kwargs)
|
|
192
193
|
logging.debug(f"Classified operations shape for {ID_NPDP}: {classified_ops.shape}")
|
|
193
194
|
|
|
194
195
|
##chequeo si first_day_op_classified es True, si es así, no se considera la primera fila de las classified_ops
|
|
@@ -306,37 +307,50 @@ if __name__ == "__main__":
|
|
|
306
307
|
import logging
|
|
307
308
|
|
|
308
309
|
## argumentos de PlantinFMCreator
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
310
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
311
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
312
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
313
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
314
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
312
315
|
|
|
313
316
|
|
|
314
|
-
|
|
315
|
-
kwargs_classifier = {"proba_threshold":0.
|
|
316
|
-
"
|
|
317
|
-
"
|
|
317
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
318
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
319
|
+
"use_proba_ma":False,
|
|
320
|
+
"proba_ma_window":10,
|
|
321
|
+
"update_samePlace":True,
|
|
322
|
+
"update_dstpt":True,
|
|
323
|
+
"umbral_proba_dstpt":0.5,
|
|
324
|
+
"umbral_bajo_dstpt":1.5,
|
|
325
|
+
"use_ma":True,
|
|
326
|
+
"dstpt_ma_window":62,
|
|
327
|
+
"use_min_dstpt":False,
|
|
328
|
+
"factor":0.1,
|
|
329
|
+
|
|
318
330
|
"useRatioStats":False,
|
|
319
331
|
"std_weight":1.,
|
|
320
332
|
"useDistancesStats":False,
|
|
321
|
-
"ratio_dcdp_umbral":0.
|
|
333
|
+
"ratio_dcdp_umbral":0.1,
|
|
322
334
|
"dist_umbral":0.5,
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
nodos = ['UPM006N','UPM007N','UPM034N','UPM037N','UPM038N','UPM039N','UPM045N','UPM041N',
|
|
338
|
+
'UPM048N','UPM105N','UPM107N']
|
|
339
|
+
for nodo in nodos:
|
|
340
|
+
print(f"**************** Procesando nodo: {nodo} ***********************")
|
|
341
|
+
historical_data_path = f"examples\\2025-08-09\\{nodo}\\historical-data.json"
|
|
342
|
+
with open(historical_data_path, 'r') as file:
|
|
343
|
+
samples = json.load(file)
|
|
344
|
+
|
|
345
|
+
op = OpsProcessor(classifier_file='modelos\\pipeline_rf.pkl',
|
|
346
|
+
regresor_file='modelos\\regresor.pkl', poly_features_file='modelos\\poly_features.pkl',
|
|
347
|
+
**kwargs_fmcreator)
|
|
348
|
+
|
|
349
|
+
ops_clasificadas = op.processOperations(samples, **kwargs_classifier)
|
|
350
|
+
probas = op.classifications_probas
|
|
351
|
+
# print(probas[:3])
|
|
352
|
+
# print(ops_clasificadas[:3])
|
|
353
|
+
df_ops_clasificadas = pd.DataFrame(ops_clasificadas)
|
|
354
|
+
|
|
355
|
+
print(df_ops_clasificadas.describe())
|
|
356
|
+
print(f"***************************************************************")
|
|
@@ -39,6 +39,8 @@ class TLMSensorDataProcessor():
|
|
|
39
39
|
obj[:] -> todo
|
|
40
40
|
obj[["col1"], :50] -> columna col1, primeras 50 filas
|
|
41
41
|
"""
|
|
42
|
+
##chqueo que se tengan datos, sino retorno []
|
|
43
|
+
|
|
42
44
|
if isinstance(key, tuple): ##reviso si es una tupla
|
|
43
45
|
##se supone que key es una tupla de la forma (cols, rows)
|
|
44
46
|
if len(key) != 2:
|
|
@@ -73,11 +75,11 @@ if __name__ == "__main__":
|
|
|
73
75
|
import json
|
|
74
76
|
from sarapy.preprocessing import TransformInputData
|
|
75
77
|
|
|
76
|
-
historical_data_path = "examples
|
|
78
|
+
historical_data_path = "examples\\2025-09-04\\UPM042N\\historical-data.json"
|
|
77
79
|
with open(historical_data_path, 'r') as file:
|
|
78
80
|
historical_data = json.load(file)
|
|
79
81
|
|
|
80
|
-
inputData_transformer = TransformInputData
|
|
82
|
+
inputData_transformer = TransformInputData()
|
|
81
83
|
data = inputData_transformer.transform(historical_data)
|
|
82
84
|
|
|
83
85
|
tlm_processor = TLMSensorDataProcessor(data=data)
|
|
@@ -87,4 +89,5 @@ if __name__ == "__main__":
|
|
|
87
89
|
tlm_processor[["id_db_dw", "id_db_h"], :5]#.shape
|
|
88
90
|
tlm_processor.keys
|
|
89
91
|
tlm_processor["longitud",:]
|
|
92
|
+
print(tlm_processor["date_oprc",:][:5])
|
|
90
93
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import pickle
|
|
2
|
+
import logging
|
|
2
3
|
from sarapy.dataProcessing import TLMSensorDataProcessor
|
|
3
4
|
|
|
4
5
|
class FertilizerTransformer:
|
|
@@ -13,22 +14,23 @@ class FertilizerTransformer:
|
|
|
13
14
|
- regresor: Regresor que transforma los valores de distorsión a gramos.
|
|
14
15
|
- poly_features: Grado del polinomio a utilizar en la transformación de los datos.
|
|
15
16
|
"""
|
|
17
|
+
self.logger = logging.getLogger("FertilizerTransformer")
|
|
16
18
|
##cargo el regresor con pickle. Usamos try para capturar el error FileNotFoundError
|
|
17
19
|
try:
|
|
18
20
|
with open(regresor_file, 'rb') as file:
|
|
19
21
|
self._regresor = pickle.load(file)
|
|
20
|
-
|
|
22
|
+
self.logger.info("Regresor cargado con éxito.")
|
|
21
23
|
except FileNotFoundError:
|
|
22
|
-
|
|
24
|
+
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
23
25
|
|
|
24
26
|
##cargo las características polinómicas con pickle. Usamos try para capturar el error FileNotFoundError
|
|
25
27
|
try:
|
|
26
28
|
with open(poly_features_file, 'rb') as file:
|
|
27
29
|
self._poly_features = pickle.load(file)
|
|
28
|
-
|
|
30
|
+
self.logger.info("Características polinómicas cargadas con éxito.")
|
|
29
31
|
except FileNotFoundError:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
33
|
+
|
|
32
34
|
self.fertilizer_grams = None ##cuando no se ha transformado ningún dato, se inicializa en None
|
|
33
35
|
|
|
34
36
|
|
|
@@ -28,11 +28,13 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
28
28
|
self.logger.info("Clasificador cargado con éxito.")
|
|
29
29
|
except FileNotFoundError:
|
|
30
30
|
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
31
|
+
raise
|
|
31
32
|
|
|
32
|
-
def classify(self, feature_matrix, dst_pt, inest_pt,
|
|
33
|
+
def classify(self, feature_matrix, dst_pt, inest_pt,
|
|
34
|
+
proba_threshold = 0.45, use_proba_ma = False, proba_ma_window = 10,
|
|
33
35
|
update_samePlace:bool = True, update_dstpt: bool = True,
|
|
34
|
-
umbral_proba_dstpt = 0.
|
|
35
|
-
use_ma =
|
|
36
|
+
umbral_proba_dstpt = 0.5, umbral_bajo_dstpt = 1.5,
|
|
37
|
+
use_ma = True, dstpt_ma_window = 62,
|
|
36
38
|
use_min_dstpt = False, factor = 0.1, **kwargs):
|
|
37
39
|
"""Genera la clasificación de las operaciones para plantines.
|
|
38
40
|
|
|
@@ -50,13 +52,25 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
50
52
|
"""
|
|
51
53
|
|
|
52
54
|
if use_ma:
|
|
53
|
-
dst_pt
|
|
55
|
+
if dst_pt.shape[0] < dstpt_ma_window:
|
|
56
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil.")
|
|
57
|
+
dst_pt = self.get_dstpt_MA(dst_pt, window_size=dst_pt.shape[0], mode='same')
|
|
58
|
+
else:
|
|
59
|
+
dst_pt = self.get_dstpt_MA(dst_pt, window_size=dstpt_ma_window, mode='same')
|
|
54
60
|
|
|
55
61
|
self.clasificaciones = self._pipeline.predict(feature_matrix)
|
|
56
62
|
self.classifications_probas = self._pipeline.predict_proba(feature_matrix)
|
|
57
63
|
|
|
58
|
-
|
|
59
|
-
|
|
64
|
+
if use_proba_ma:
|
|
65
|
+
if proba_ma_window >= self.classifications_probas.shape[0]:
|
|
66
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil a las probabilidades.")
|
|
67
|
+
probas_ma = self.get_probas_MA(self.classifications_probas, window_size=self.classifications_probas.shape[0], mode='same')
|
|
68
|
+
else:
|
|
69
|
+
probas_ma = self.get_probas_MA(self.classifications_probas, window_size=proba_ma_window, mode='same')
|
|
70
|
+
self.clasificaciones[probas_ma[:,1] < proba_threshold] = 0
|
|
71
|
+
else:
|
|
72
|
+
# self.clasificaciones = self._pipeline.classes_[np.argmax(self.classifications_probas, axis=1)]
|
|
73
|
+
self.clasificaciones[self.classifications_probas[:,1] < proba_threshold] = 0
|
|
60
74
|
|
|
61
75
|
if update_samePlace:
|
|
62
76
|
self.grouped_ops = self.groupOpsSamePlace(feature_matrix, **kwargs)
|
|
@@ -69,7 +83,7 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
69
83
|
|
|
70
84
|
return self.clasificaciones, self.classifications_probas
|
|
71
85
|
|
|
72
|
-
def groupOpsSamePlace(self, X, useRatioStats =
|
|
86
|
+
def groupOpsSamePlace(self, X, useRatioStats = False, std_weight=1, useDistancesStats = False,
|
|
73
87
|
ratio_dcdp_umbral=0.1, dist_umbral=0.5):
|
|
74
88
|
"""
|
|
75
89
|
Función que agrupa las operaciones que se realizaron en el mismo lugar o que sean de limpieza.
|
|
@@ -141,7 +155,7 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
141
155
|
return new_labels
|
|
142
156
|
|
|
143
157
|
def updateLabelsFromDSTPT(self, labels, dst_pt, inest_pt,
|
|
144
|
-
umbral_bajo_dstpt = 4, umbral_proba_dstpt = 0.
|
|
158
|
+
umbral_bajo_dstpt = 4, umbral_proba_dstpt = 0.5,
|
|
145
159
|
use_min_dstpt = False, factor = 0.1):
|
|
146
160
|
"""
|
|
147
161
|
Función para actualizar las etiquetas de las operaciones que tengan distorsiones de plantín.
|
|
@@ -164,7 +178,25 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
164
178
|
data: numpy array con los datos de la serie temporal
|
|
165
179
|
window_size: tamaño de la ventana para calcular la media móvil
|
|
166
180
|
"""
|
|
167
|
-
return np.convolve(dst_pt, np.ones(window_size)/window_size, mode=mode)
|
|
181
|
+
# return np.convolve(dst_pt, np.ones(window_size)/window_size, mode=mode)
|
|
182
|
+
padding_start = dst_pt[0:window_size]
|
|
183
|
+
padding_end = dst_pt[-window_size:]
|
|
184
|
+
padded_data = np.concatenate([padding_start, dst_pt, padding_end])
|
|
185
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
186
|
+
return ma_full[window_size: -window_size]
|
|
187
|
+
|
|
188
|
+
def get_probas_MA(self, probas, window_size=104, mode='same'):
|
|
189
|
+
"""
|
|
190
|
+
Función para calcular la media móvil de una serie temporal.
|
|
191
|
+
data: numpy array con los datos de la serie temporal
|
|
192
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
193
|
+
"""
|
|
194
|
+
# return np.convolve(dst_pt, np.ones(window_size)/window_size, mode=mode)
|
|
195
|
+
padding_start = probas[0:window_size, :]
|
|
196
|
+
padding_end = probas[-window_size:, :]
|
|
197
|
+
padded_data = np.vstack([padding_start, probas, padding_end])
|
|
198
|
+
ma_full = np.apply_along_axis(lambda m: np.convolve(m, np.ones(window_size)/window_size, mode='same'), axis=0, arr=padded_data)
|
|
199
|
+
return ma_full[window_size: -window_size, :]
|
|
168
200
|
|
|
169
201
|
if __name__ == "__main__":
|
|
170
202
|
import os
|
|
@@ -176,22 +208,35 @@ if __name__ == "__main__":
|
|
|
176
208
|
import json
|
|
177
209
|
|
|
178
210
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
211
|
+
## argumentos de PlantinFMCreator
|
|
212
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
213
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
214
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
215
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
216
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
220
|
+
kwargs_classifier = {"proba_threshold":0.45,
|
|
221
|
+
"use_proba_ma":False,
|
|
222
|
+
"proba_ma_window":10,
|
|
223
|
+
"update_samePlace":True,
|
|
224
|
+
"update_dstpt":True,
|
|
225
|
+
"umbral_proba_dstpt":0.5,
|
|
226
|
+
"umbral_bajo_dstpt":1.5,
|
|
227
|
+
"use_ma":True,
|
|
228
|
+
"dstpt_ma_window":62,
|
|
229
|
+
"use_min_dstpt":False,
|
|
230
|
+
"factor":0.1,
|
|
231
|
+
|
|
186
232
|
"useRatioStats":False,
|
|
187
233
|
"std_weight":1.,
|
|
188
234
|
"useDistancesStats":False,
|
|
189
|
-
"ratio_dcdp_umbral":0.
|
|
235
|
+
"ratio_dcdp_umbral":0.1,
|
|
190
236
|
"dist_umbral":0.5,
|
|
191
|
-
|
|
192
|
-
"umbral_proba_dstpt":0.85}
|
|
237
|
+
}
|
|
193
238
|
|
|
194
|
-
historical_data_path = "examples\\2025-09-04\\
|
|
239
|
+
historical_data_path = "examples\\2025-09-04\\UPM042N\\historical-data.json"
|
|
195
240
|
with open(historical_data_path, 'r') as file:
|
|
196
241
|
samples = json.load(file)
|
|
197
242
|
|
|
@@ -210,6 +255,3 @@ if __name__ == "__main__":
|
|
|
210
255
|
print("primeras probabilidades", probas[100:105])
|
|
211
256
|
print("primeras distorsiones", dst_pt[100:105])
|
|
212
257
|
print("primeras inestabilidades", inest_pt[100:105])
|
|
213
|
-
|
|
214
|
-
# print(rf_clf_wu.classify(X, dst_pt, inest_pt, **kwargs_classifier))
|
|
215
|
-
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
###Documentación en https://github.com/lucasbaldezzari/sarapy/blob/main/docs/Docs.md
|
|
2
|
+
import logging
|
|
2
3
|
import numpy as np
|
|
3
4
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
4
5
|
from sarapy.dataProcessing import TLMSensorDataProcessor, TimeSeriesProcessor, GeoProcessor
|
|
@@ -20,7 +21,9 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
20
21
|
|
|
21
22
|
def __init__(self, imputeDistances = True, distanciaMedia:float = 1.8,
|
|
22
23
|
umbral_precision:float = 0.3, dist_mismo_lugar = 0.0, max_dist = 100,
|
|
23
|
-
umbral_ratio_dCdP:float = 0.5, deltaO_medio = 4, baseDeltaP = 10
|
|
24
|
+
umbral_ratio_dCdP:float = 0.5, deltaO_medio = 4, baseDeltaP = 10,
|
|
25
|
+
impute_ratiodcdp = False, umbral_impute_ratiodcdp = -0.8,
|
|
26
|
+
deltaO_ma = False, deltaO_ma_window = 26):
|
|
24
27
|
"""Inicializa la clase FMCreator.
|
|
25
28
|
|
|
26
29
|
Args:
|
|
@@ -30,6 +33,7 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
30
33
|
- umbral_ratio_dCdP: Umbral para el ratio entre el delta de caminata y el delta de pico abierto.
|
|
31
34
|
- deltaO_medio: delta de operación medio entre operaciones.
|
|
32
35
|
"""
|
|
36
|
+
self.logger = logging.getLogger("PlantinFMCreator")
|
|
33
37
|
|
|
34
38
|
self.is_fitted = False
|
|
35
39
|
self.imputeDistances = imputeDistances
|
|
@@ -40,9 +44,10 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
40
44
|
self.umbral_ratio_dCdP = umbral_ratio_dCdP
|
|
41
45
|
self.deltaO_medio = deltaO_medio
|
|
42
46
|
self.baseDeltaP = baseDeltaP
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
self.
|
|
47
|
+
self.impute_ratiodcdp = impute_ratiodcdp
|
|
48
|
+
self.umbral_impute_ratiodcdp = umbral_impute_ratiodcdp
|
|
49
|
+
self.deltaO_ma = deltaO_ma
|
|
50
|
+
self.deltaO_ma_window = deltaO_ma_window
|
|
46
51
|
|
|
47
52
|
def fit(self, X: np.array, y=None)-> np.array:
|
|
48
53
|
"""Fittea el objeto
|
|
@@ -73,7 +78,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
73
78
|
tpDP = timeProcessor._dataPositions
|
|
74
79
|
geoprocessor = GeoProcessor.GeoProcessor()
|
|
75
80
|
|
|
76
|
-
|
|
77
81
|
date_oprc = self.tlmDataProcessor["date_oprc",:] #datos de fecha y hora de operación
|
|
78
82
|
time_ac = self.tlmDataProcessor["TIME_AC",:]/self.baseDeltaP #datos de fecha y hora de operación en formato timestamp
|
|
79
83
|
lats = self.tlmDataProcessor["latitud",:] #latitudes de las operaciones
|
|
@@ -88,7 +92,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
88
92
|
timeData = np.hstack((date_oprc.reshape(-1,1),time_ac.reshape(-1, 1)))
|
|
89
93
|
|
|
90
94
|
self._timeDeltas = timeProcessor.fit_transform(timeData)
|
|
91
|
-
# print(np.median(self._timeDeltas[:,tpDP["ratio_dCdP"]]))
|
|
92
95
|
|
|
93
96
|
##fitteamos geoprocessor con las latitudes y longitudes
|
|
94
97
|
points = np.hstack((lats.reshape(-1,1),longs.reshape(-1,1)))
|
|
@@ -98,6 +101,22 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
98
101
|
self.featureMatrix = np.vstack((self._timeDeltas[:,tpDP["deltaO"]],
|
|
99
102
|
self._timeDeltas[:,tpDP["ratio_dCdP"]],
|
|
100
103
|
self._distances)).T
|
|
104
|
+
|
|
105
|
+
if self.impute_ratiodcdp:
|
|
106
|
+
ratio_dcdp_median = np.median(self.featureMatrix[:, 1])
|
|
107
|
+
self.featureMatrix[:, 1] = np.where(self.featureMatrix[:, 1] < self.umbral_impute_ratiodcdp, ratio_dcdp_median, self.featureMatrix[:, 1])
|
|
108
|
+
|
|
109
|
+
if self.deltaO_ma:
|
|
110
|
+
data = self.featureMatrix[:, 0]
|
|
111
|
+
if self.deltaO_ma_window >= len(data):
|
|
112
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil a deltaO.")
|
|
113
|
+
self.deltaO_ma_window = len(data)
|
|
114
|
+
|
|
115
|
+
padding_start = data[0:self.deltaO_ma_window]
|
|
116
|
+
padding_end = data[-self.deltaO_ma_window:]
|
|
117
|
+
padded_data = np.concatenate([padding_start, data, padding_end])
|
|
118
|
+
ma_full = np.convolve(padded_data, np.ones(self.deltaO_ma_window)/self.deltaO_ma_window, mode='same')
|
|
119
|
+
self.featureMatrix[:, 0] = ma_full[self.deltaO_ma_window: - self.deltaO_ma_window]
|
|
101
120
|
|
|
102
121
|
return self.featureMatrix, self.dst_pt, self.inest_pt
|
|
103
122
|
|
|
@@ -130,12 +149,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
130
149
|
"""Devuelve las distancias entre operaciones."""
|
|
131
150
|
return self._distances
|
|
132
151
|
|
|
133
|
-
@property
|
|
134
|
-
def dataPositions(self):
|
|
135
|
-
"""Devuelve el diccionario con la posición de los datos dentro del array devuelto por transform()."""
|
|
136
|
-
return self._dataPositions
|
|
137
|
-
|
|
138
|
-
|
|
139
152
|
if __name__ == "__main__":
|
|
140
153
|
import pandas as pd
|
|
141
154
|
import json
|
|
@@ -137,7 +137,7 @@ if __name__ == "__main__":
|
|
|
137
137
|
import pandas as pd
|
|
138
138
|
import json
|
|
139
139
|
|
|
140
|
-
historical_data_path = "examples\\2025-
|
|
140
|
+
historical_data_path = "examples\\2025-09-04\\UPM042N\\historical-data.json"
|
|
141
141
|
with open(historical_data_path, 'r') as file:
|
|
142
142
|
historical_data = json.load(file)
|
|
143
143
|
df = pd.DataFrame(historical_data)
|
|
@@ -146,4 +146,5 @@ if __name__ == "__main__":
|
|
|
146
146
|
data_positions = json.load(open("sarapy/preprocessing/telemetriaDataPosition.json", 'r'))
|
|
147
147
|
transform_input_data = TransformInputData()
|
|
148
148
|
transformed_data = transform_input_data.transform(historical_data)
|
|
149
|
-
print(transformed_data[:2])
|
|
149
|
+
print(transformed_data[:2])
|
|
150
|
+
print(transformed_data[0]["date_oprc"])
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
## Version of the package
|
|
2
|
-
__version__ = "
|
|
2
|
+
__version__ = "3.0.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sarapy
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Home-page: https://github.com/lucasbaldezzari/sarapy
|
|
5
5
|
Author: Lucas Baldezzari
|
|
6
6
|
Author-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
@@ -19,6 +19,39 @@ Requires-Dist: geopy
|
|
|
19
19
|
|
|
20
20
|
Library for processing SARAPICO project metadata of _AMG SA_.
|
|
21
21
|
|
|
22
|
+
#### Version 3.0.0
|
|
23
|
+
- Se mejora la forma de obtener valores de media movil para todas las variables en las que se usa.
|
|
24
|
+
- Se corrigen bugs debido a nodos con pocas operaciones.
|
|
25
|
+
- Se corrigen errores a la hora de pasar parámetros a los métodos de algunas clases.
|
|
26
|
+
- Se configuran parámetros de fmcreator y plantin_classifier para el reetiquetado, los mismos son:
|
|
27
|
+
|
|
28
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
29
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
30
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
31
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
32
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
36
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
37
|
+
"use_proba_ma":False,
|
|
38
|
+
"proba_ma_window":10,
|
|
39
|
+
"update_samePlace":True,
|
|
40
|
+
"update_dstpt":True,
|
|
41
|
+
"umbral_proba_dstpt":0.5,
|
|
42
|
+
"umbral_bajo_dstpt":1.5,
|
|
43
|
+
"use_ma":True,
|
|
44
|
+
"dstpt_ma_window":62,
|
|
45
|
+
"use_min_dstpt":False,
|
|
46
|
+
"factor":0.1,
|
|
47
|
+
|
|
48
|
+
"useRatioStats":False,
|
|
49
|
+
"std_weight":1.,
|
|
50
|
+
"useDistancesStats":False,
|
|
51
|
+
"ratio_dcdp_umbral":0.1,
|
|
52
|
+
"dist_umbral":0.5,
|
|
53
|
+
}
|
|
54
|
+
|
|
22
55
|
#### Version 2.3.0
|
|
23
56
|
|
|
24
57
|
- Se agregan funcionalidades.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|