sarapy 2.2.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarapy/analysis/FeaturesResume.py +722 -0
- sarapy/analysis/__init__.py +3 -0
- sarapy/dataProcessing/OpsProcessor.py +68 -33
- sarapy/dataProcessing/TLMSensorDataProcessor.py +5 -2
- sarapy/mlProcessors/FertilizerTransformer.py +7 -5
- sarapy/mlProcessors/PlantinClassifier.py +120 -31
- sarapy/mlProcessors/PlantinFMCreator.py +25 -12
- sarapy/mlProcessors/__init__.py +11 -0
- sarapy/preprocessing/TransformInputData.py +3 -2
- sarapy/preprocessing/__init__.py +11 -2
- sarapy/stats/__init__.py +13 -1
- sarapy/stats/stats.py +5 -6
- sarapy/utils/__init__.py +3 -0
- sarapy/utils/utils.py +172 -0
- sarapy/version.py +2 -2
- {sarapy-2.2.0.dist-info → sarapy-3.0.0.dist-info}/METADATA +39 -1
- sarapy-3.0.0.dist-info/RECORD +29 -0
- sarapy/utils/amg_decoder.py +0 -125
- sarapy/utils/amg_ppk.py +0 -38
- sarapy/utils/getRawOperations.py +0 -20
- sarapy-2.2.0.dist-info/RECORD +0 -29
- {sarapy-2.2.0.dist-info → sarapy-3.0.0.dist-info}/LICENCE +0 -0
- {sarapy-2.2.0.dist-info → sarapy-3.0.0.dist-info}/WHEEL +0 -0
- {sarapy-2.2.0.dist-info → sarapy-3.0.0.dist-info}/top_level.txt +0 -0
|
@@ -29,7 +29,12 @@ class OpsProcessor():
|
|
|
29
29
|
- kwargs: Diccionario con los argumentos necesarios instanciar algunas clases.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
|
+
self.classifications_probas = None
|
|
32
33
|
plclass_map = {"classifier_file"}
|
|
34
|
+
self._operationsDict = {} ##diccionario de operarios con sus operaciones
|
|
35
|
+
self._platin_classifiedOperations = np.array([]) ##array con las operaciones clasificadas para plantin
|
|
36
|
+
self._fertilizer_classifiedOperations = np.array([]) ##array con las operaciones clasificadas para plantin
|
|
37
|
+
self._last_row_db = 0 ##indicador de la última fila de los datos extraidos de la base de datos histórica
|
|
33
38
|
|
|
34
39
|
kwargs_plclass = {}
|
|
35
40
|
##recorro kwargs y usando plclass_map creo un nuevo diccionario con los valores que se pasaron
|
|
@@ -38,15 +43,16 @@ class OpsProcessor():
|
|
|
38
43
|
kwargs_plclass[key] = value
|
|
39
44
|
|
|
40
45
|
fmcreator_map = {"imputeDistances", "distanciaMedia", "umbral_precision",
|
|
41
|
-
"dist_mismo_lugar", "max_dist", "umbral_ratio_dCdP", "deltaO_medio"
|
|
46
|
+
"dist_mismo_lugar", "max_dist", "umbral_ratio_dCdP", "deltaO_medio",
|
|
47
|
+
"impute_ratiodcdp", "umbral_impute_ratiodcdp", "deltaO_ma", "deltaO_ma_window"}
|
|
42
48
|
fmcreator_kargs = {}
|
|
43
49
|
##recorro kwargs y usando fmcreator_map creo un nuevo diccionario con los valores que se pasaron
|
|
44
50
|
for key, value in kwargs.items():
|
|
45
51
|
if key in fmcreator_map:
|
|
46
52
|
fmcreator_kargs[key] = value
|
|
47
53
|
|
|
48
|
-
self._plantin_classifier = PlantinClassifier
|
|
49
|
-
self.plantinFMCreator = PlantinFMCreator
|
|
54
|
+
self._plantin_classifier = PlantinClassifier(**kwargs_plclass)
|
|
55
|
+
self.plantinFMCreator = PlantinFMCreator(**fmcreator_kargs)
|
|
50
56
|
|
|
51
57
|
##mapa de argumentos para FertilizerTransformer
|
|
52
58
|
ft_map = {"regresor_file", "poly_features_file"}
|
|
@@ -56,16 +62,11 @@ class OpsProcessor():
|
|
|
56
62
|
if key in ft_map:
|
|
57
63
|
ft_kwargs[key] = value
|
|
58
64
|
|
|
59
|
-
self._ftfmcreator = FertilizerFMCreator
|
|
60
|
-
self._fertilizer_transformer = FertilizerTransformer
|
|
61
|
-
|
|
62
|
-
self.
|
|
63
|
-
|
|
64
|
-
self._fertilizer_classifiedOperations = np.array([]) ##array con las operaciones clasificadas para plantin
|
|
65
|
-
self._last_row_db = 0 ##indicador de la última fila de los datos extraidos de la base de datos histórica
|
|
66
|
-
self.transformInputData = TransformInputData.TransformInputData()
|
|
67
|
-
self.transformToOutputData = TransformToOutputData.TransformToOutputData()
|
|
68
|
-
|
|
65
|
+
self._ftfmcreator = FertilizerFMCreator()
|
|
66
|
+
self._fertilizer_transformer = FertilizerTransformer(**ft_kwargs)
|
|
67
|
+
self.transformInputData = TransformInputData()
|
|
68
|
+
self.transformToOutputData = TransformToOutputData()
|
|
69
|
+
|
|
69
70
|
def processOperations(self, data, **kwargs):
|
|
70
71
|
"""Método para procesar las operaciones de los operarios.
|
|
71
72
|
|
|
@@ -94,14 +95,14 @@ class OpsProcessor():
|
|
|
94
95
|
|
|
95
96
|
#Si tenemos nuevas operaciones, actualizamos el diccionario de operaciones
|
|
96
97
|
self.updateOperationsDict(newSample) #actualizamos diccionario interno de la clase
|
|
97
|
-
pl_clas = self.classifyForPlantin(**kwargs) #clasificamos las operaciones para plantín
|
|
98
|
+
pl_clas, self.classifications_probas = self.classifyForPlantin(**kwargs) #clasificamos las operaciones para plantín
|
|
98
99
|
|
|
99
100
|
#estimamos los gramos de fertilizante
|
|
100
101
|
ft_grams = self._fertilizer_transformer.transform(newSample)
|
|
101
102
|
logging.debug(f"Fertilizer grams shape: {ft_grams.shape}")
|
|
102
103
|
id_db_h_nums, id_db_dw_nums = self.getActualOperationsNumbers() #obtenemos los números de operaciones desde el diccionario de operaciones
|
|
103
104
|
logging.debug(f"ID_DB_H shape: {id_db_h_nums.shape}, ID_DB_DW shape: {id_db_dw_nums.shape}")
|
|
104
|
-
date_oprc = pd.DataFrame(newSample)["date_oprc"].values.reshape(-1, 1) ##extraigo las fechas de operación de la muestra
|
|
105
|
+
# date_oprc = pd.DataFrame(newSample)["date_oprc"].values.reshape(-1, 1) ##extraigo las fechas de operación de la muestra
|
|
105
106
|
timestamps = pd.DataFrame(newSample)["timestamp"].values.reshape(-1, 1) ##extraigo los timestamps de la muestra
|
|
106
107
|
|
|
107
108
|
return self.transformToOutputData.fit_transform(np.column_stack((timestamps,
|
|
@@ -167,7 +168,8 @@ class OpsProcessor():
|
|
|
167
168
|
|
|
168
169
|
key_classify_map = {"feature_matrix", "update_samePlace",
|
|
169
170
|
"useRatioStats", "std_weight", "useDistancesStats",
|
|
170
|
-
"ratio_dcdp_umbral", "dist_umbral"
|
|
171
|
+
"ratio_dcdp_umbral", "dist_umbral",
|
|
172
|
+
"umbral_bajo_dstpt", "umbral_proba_dstpt"}
|
|
171
173
|
|
|
172
174
|
##recorro kwargs y usando key_classify_map creo un nuevo diccionario con los valores que se pasaron
|
|
173
175
|
classify_kwargs = {}
|
|
@@ -187,7 +189,7 @@ class OpsProcessor():
|
|
|
187
189
|
logging.debug(f"Número de operaciones para el nodo {ID_NPDP}: {len(operations)}")
|
|
188
190
|
features, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(operations)
|
|
189
191
|
logging.debug(f"Features shape for {ID_NPDP}: {features.shape}")
|
|
190
|
-
classified_ops = self._plantin_classifier.classify(features, dst_pt, inest_pt, **
|
|
192
|
+
classified_ops, classifications_probas = self._plantin_classifier.classify(features, dst_pt, inest_pt, **kwargs)
|
|
191
193
|
logging.debug(f"Classified operations shape for {ID_NPDP}: {classified_ops.shape}")
|
|
192
194
|
|
|
193
195
|
##chequeo si first_day_op_classified es True, si es así, no se considera la primera fila de las classified_ops
|
|
@@ -201,7 +203,7 @@ class OpsProcessor():
|
|
|
201
203
|
|
|
202
204
|
self._operationsDict[ID_NPDP]["first_day_op_classified"] = True
|
|
203
205
|
|
|
204
|
-
return plantinClassifications
|
|
206
|
+
return plantinClassifications, classifications_probas
|
|
205
207
|
|
|
206
208
|
def updateLastOperations(self, ID_NPDPs_newOperations):
|
|
207
209
|
"""Método para actualizar la última operación de una muestra de operaciones en el diccionario de operaciones
|
|
@@ -303,19 +305,52 @@ if __name__ == "__main__":
|
|
|
303
305
|
import pandas as pd
|
|
304
306
|
import json
|
|
305
307
|
import logging
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
historical_data_path = "examples\\2025-08-04 copy\\UPM008N\\historical-data.json"
|
|
309
|
-
with open(historical_data_path, 'r') as file:
|
|
310
|
-
samples = json.load(file)
|
|
311
|
-
|
|
312
|
-
samples1 = samples
|
|
313
|
-
|
|
314
|
-
op = OpsProcessor(classifier_file='modelos\\pipeline_rf.pkl', imputeDistances = False,
|
|
315
|
-
regresor_file='modelos\\regresor.pkl', poly_features_file='modelos\\poly_features.pkl')
|
|
316
|
-
|
|
317
|
-
ops_clasificadas = op.processOperations(samples)
|
|
318
|
-
df_ops_clasificadas = pd.DataFrame(ops_clasificadas)
|
|
319
|
-
|
|
320
|
-
print(df_ops_clasificadas.describe())
|
|
321
308
|
|
|
309
|
+
## argumentos de PlantinFMCreator
|
|
310
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
311
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
312
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
313
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
314
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
318
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
319
|
+
"use_proba_ma":False,
|
|
320
|
+
"proba_ma_window":10,
|
|
321
|
+
"update_samePlace":True,
|
|
322
|
+
"update_dstpt":True,
|
|
323
|
+
"umbral_proba_dstpt":0.5,
|
|
324
|
+
"umbral_bajo_dstpt":1.5,
|
|
325
|
+
"use_ma":True,
|
|
326
|
+
"dstpt_ma_window":62,
|
|
327
|
+
"use_min_dstpt":False,
|
|
328
|
+
"factor":0.1,
|
|
329
|
+
|
|
330
|
+
"useRatioStats":False,
|
|
331
|
+
"std_weight":1.,
|
|
332
|
+
"useDistancesStats":False,
|
|
333
|
+
"ratio_dcdp_umbral":0.1,
|
|
334
|
+
"dist_umbral":0.5,
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
nodos = ['UPM006N','UPM007N','UPM034N','UPM037N','UPM038N','UPM039N','UPM045N','UPM041N',
|
|
338
|
+
'UPM048N','UPM105N','UPM107N']
|
|
339
|
+
for nodo in nodos:
|
|
340
|
+
print(f"**************** Procesando nodo: {nodo} ***********************")
|
|
341
|
+
historical_data_path = f"examples\\2025-08-09\\{nodo}\\historical-data.json"
|
|
342
|
+
with open(historical_data_path, 'r') as file:
|
|
343
|
+
samples = json.load(file)
|
|
344
|
+
|
|
345
|
+
op = OpsProcessor(classifier_file='modelos\\pipeline_rf.pkl',
|
|
346
|
+
regresor_file='modelos\\regresor.pkl', poly_features_file='modelos\\poly_features.pkl',
|
|
347
|
+
**kwargs_fmcreator)
|
|
348
|
+
|
|
349
|
+
ops_clasificadas = op.processOperations(samples, **kwargs_classifier)
|
|
350
|
+
probas = op.classifications_probas
|
|
351
|
+
# print(probas[:3])
|
|
352
|
+
# print(ops_clasificadas[:3])
|
|
353
|
+
df_ops_clasificadas = pd.DataFrame(ops_clasificadas)
|
|
354
|
+
|
|
355
|
+
print(df_ops_clasificadas.describe())
|
|
356
|
+
print(f"***************************************************************")
|
|
@@ -39,6 +39,8 @@ class TLMSensorDataProcessor():
|
|
|
39
39
|
obj[:] -> todo
|
|
40
40
|
obj[["col1"], :50] -> columna col1, primeras 50 filas
|
|
41
41
|
"""
|
|
42
|
+
##chqueo que se tengan datos, sino retorno []
|
|
43
|
+
|
|
42
44
|
if isinstance(key, tuple): ##reviso si es una tupla
|
|
43
45
|
##se supone que key es una tupla de la forma (cols, rows)
|
|
44
46
|
if len(key) != 2:
|
|
@@ -73,11 +75,11 @@ if __name__ == "__main__":
|
|
|
73
75
|
import json
|
|
74
76
|
from sarapy.preprocessing import TransformInputData
|
|
75
77
|
|
|
76
|
-
historical_data_path = "examples
|
|
78
|
+
historical_data_path = "examples\\2025-09-04\\UPM042N\\historical-data.json"
|
|
77
79
|
with open(historical_data_path, 'r') as file:
|
|
78
80
|
historical_data = json.load(file)
|
|
79
81
|
|
|
80
|
-
inputData_transformer = TransformInputData
|
|
82
|
+
inputData_transformer = TransformInputData()
|
|
81
83
|
data = inputData_transformer.transform(historical_data)
|
|
82
84
|
|
|
83
85
|
tlm_processor = TLMSensorDataProcessor(data=data)
|
|
@@ -87,4 +89,5 @@ if __name__ == "__main__":
|
|
|
87
89
|
tlm_processor[["id_db_dw", "id_db_h"], :5]#.shape
|
|
88
90
|
tlm_processor.keys
|
|
89
91
|
tlm_processor["longitud",:]
|
|
92
|
+
print(tlm_processor["date_oprc",:][:5])
|
|
90
93
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import pickle
|
|
2
|
+
import logging
|
|
2
3
|
from sarapy.dataProcessing import TLMSensorDataProcessor
|
|
3
4
|
|
|
4
5
|
class FertilizerTransformer:
|
|
@@ -13,22 +14,23 @@ class FertilizerTransformer:
|
|
|
13
14
|
- regresor: Regresor que transforma los valores de distorsión a gramos.
|
|
14
15
|
- poly_features: Grado del polinomio a utilizar en la transformación de los datos.
|
|
15
16
|
"""
|
|
17
|
+
self.logger = logging.getLogger("FertilizerTransformer")
|
|
16
18
|
##cargo el regresor con pickle. Usamos try para capturar el error FileNotFoundError
|
|
17
19
|
try:
|
|
18
20
|
with open(regresor_file, 'rb') as file:
|
|
19
21
|
self._regresor = pickle.load(file)
|
|
20
|
-
|
|
22
|
+
self.logger.info("Regresor cargado con éxito.")
|
|
21
23
|
except FileNotFoundError:
|
|
22
|
-
|
|
24
|
+
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
23
25
|
|
|
24
26
|
##cargo las características polinómicas con pickle. Usamos try para capturar el error FileNotFoundError
|
|
25
27
|
try:
|
|
26
28
|
with open(poly_features_file, 'rb') as file:
|
|
27
29
|
self._poly_features = pickle.load(file)
|
|
28
|
-
|
|
30
|
+
self.logger.info("Características polinómicas cargadas con éxito.")
|
|
29
31
|
except FileNotFoundError:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
33
|
+
|
|
32
34
|
self.fertilizer_grams = None ##cuando no se ha transformado ningún dato, se inicializa en None
|
|
33
35
|
|
|
34
36
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
###Documentación en https://github.com/lucasbaldezzari/sarapy/blob/main/docs/Docs.md
|
|
2
|
+
import logging
|
|
2
3
|
import numpy as np
|
|
3
4
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
4
5
|
from sklearn.pipeline import Pipeline
|
|
@@ -15,17 +16,26 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
15
16
|
- classifier_file: String con el nombre del archivo que contiene el clasificador entrenado. El archivo a cargar es un archivo .pkl.
|
|
16
17
|
"""
|
|
17
18
|
|
|
19
|
+
self.logger = logging.getLogger("PlantinClassifier")
|
|
20
|
+
|
|
21
|
+
self.classifications_probas = None
|
|
22
|
+
self.clasificaciones = None
|
|
23
|
+
|
|
18
24
|
#cargo el clasificador con pickle. Usamos try para capturar el error FileNotFoundError
|
|
19
25
|
try:
|
|
20
26
|
with open(classifier_file, 'rb') as file:
|
|
21
27
|
self._pipeline = pickle.load(file)
|
|
22
|
-
|
|
28
|
+
self.logger.info("Clasificador cargado con éxito.")
|
|
23
29
|
except FileNotFoundError:
|
|
24
|
-
|
|
25
|
-
|
|
30
|
+
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
31
|
+
raise
|
|
32
|
+
|
|
26
33
|
def classify(self, feature_matrix, dst_pt, inest_pt,
|
|
34
|
+
proba_threshold = 0.45, use_proba_ma = False, proba_ma_window = 10,
|
|
27
35
|
update_samePlace:bool = True, update_dstpt: bool = True,
|
|
28
|
-
|
|
36
|
+
umbral_proba_dstpt = 0.5, umbral_bajo_dstpt = 1.5,
|
|
37
|
+
use_ma = True, dstpt_ma_window = 62,
|
|
38
|
+
use_min_dstpt = False, factor = 0.1, **kwargs):
|
|
29
39
|
"""Genera la clasificación de las operaciones para plantines.
|
|
30
40
|
|
|
31
41
|
- feature_matrix: Es un array con los datos (strings) provenientes de la base de datos histórica.
|
|
@@ -41,19 +51,39 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
41
51
|
NOTA: Estas características son necesarias en base a la última versión del modelo de clasificación.
|
|
42
52
|
"""
|
|
43
53
|
|
|
44
|
-
|
|
45
|
-
|
|
54
|
+
if use_ma:
|
|
55
|
+
if dst_pt.shape[0] < dstpt_ma_window:
|
|
56
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil.")
|
|
57
|
+
dst_pt = self.get_dstpt_MA(dst_pt, window_size=dst_pt.shape[0], mode='same')
|
|
58
|
+
else:
|
|
59
|
+
dst_pt = self.get_dstpt_MA(dst_pt, window_size=dstpt_ma_window, mode='same')
|
|
60
|
+
|
|
61
|
+
self.clasificaciones = self._pipeline.predict(feature_matrix)
|
|
62
|
+
self.classifications_probas = self._pipeline.predict_proba(feature_matrix)
|
|
63
|
+
|
|
64
|
+
if use_proba_ma:
|
|
65
|
+
if proba_ma_window >= self.classifications_probas.shape[0]:
|
|
66
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil a las probabilidades.")
|
|
67
|
+
probas_ma = self.get_probas_MA(self.classifications_probas, window_size=self.classifications_probas.shape[0], mode='same')
|
|
68
|
+
else:
|
|
69
|
+
probas_ma = self.get_probas_MA(self.classifications_probas, window_size=proba_ma_window, mode='same')
|
|
70
|
+
self.clasificaciones[probas_ma[:,1] < proba_threshold] = 0
|
|
71
|
+
else:
|
|
72
|
+
# self.clasificaciones = self._pipeline.classes_[np.argmax(self.classifications_probas, axis=1)]
|
|
73
|
+
self.clasificaciones[self.classifications_probas[:,1] < proba_threshold] = 0
|
|
46
74
|
|
|
47
75
|
if update_samePlace:
|
|
48
76
|
self.grouped_ops = self.groupOpsSamePlace(feature_matrix, **kwargs)
|
|
49
|
-
self.
|
|
77
|
+
self.clasificaciones = self.updateLabelsSamePlace(self.clasificaciones, self.grouped_ops)
|
|
50
78
|
|
|
51
79
|
if update_dstpt:
|
|
52
|
-
self.
|
|
80
|
+
self.clasificaciones = self.updateLabelsFromDSTPT(self.clasificaciones, dst_pt, inest_pt,
|
|
81
|
+
umbral_bajo_dstpt, umbral_proba_dstpt,
|
|
82
|
+
use_min_dstpt, factor)
|
|
53
83
|
|
|
54
|
-
return self.
|
|
55
|
-
|
|
56
|
-
def groupOpsSamePlace(self, X, useRatioStats =
|
|
84
|
+
return self.clasificaciones, self.classifications_probas
|
|
85
|
+
|
|
86
|
+
def groupOpsSamePlace(self, X, useRatioStats = False, std_weight=1, useDistancesStats = False,
|
|
57
87
|
ratio_dcdp_umbral=0.1, dist_umbral=0.5):
|
|
58
88
|
"""
|
|
59
89
|
Función que agrupa las operaciones que se realizaron en el mismo lugar o que sean de limpieza.
|
|
@@ -123,20 +153,50 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
123
153
|
new_labels[indexes[1:]] = 0
|
|
124
154
|
|
|
125
155
|
return new_labels
|
|
126
|
-
|
|
127
|
-
def updateLabelsFromDSTPT(self, labels, dst_pt, inest_pt,
|
|
156
|
+
|
|
157
|
+
def updateLabelsFromDSTPT(self, labels, dst_pt, inest_pt,
|
|
158
|
+
umbral_bajo_dstpt = 4, umbral_proba_dstpt = 0.5,
|
|
159
|
+
use_min_dstpt = False, factor = 0.1):
|
|
128
160
|
"""
|
|
129
161
|
Función para actualizar las etiquetas de las operaciones que tengan distorsiones de plantín.
|
|
130
162
|
"""
|
|
131
163
|
new_labels = labels.copy()
|
|
164
|
+
|
|
165
|
+
umbral_bajo_dstpt = min(dst_pt)*(1+factor) if use_min_dstpt else umbral_bajo_dstpt
|
|
132
166
|
|
|
133
|
-
##filtro
|
|
134
|
-
new_labels[(dst_pt <
|
|
167
|
+
##filtro
|
|
168
|
+
new_labels[(dst_pt < umbral_bajo_dstpt) & (inest_pt == 0)] = 0
|
|
135
169
|
|
|
136
|
-
##si inest_pt 1 es y
|
|
137
|
-
new_labels[(inest_pt == 1) & (self.
|
|
170
|
+
##si inest_pt 1 es y las probs son menores a umbral_proba_dstpt, entonces la operación es 0
|
|
171
|
+
new_labels[(inest_pt == 1) & (self.classifications_probas[:,1] < umbral_proba_dstpt)] = 0
|
|
138
172
|
|
|
139
173
|
return new_labels
|
|
174
|
+
|
|
175
|
+
def get_dstpt_MA(self, dst_pt, window_size=104, mode='same'):
|
|
176
|
+
"""
|
|
177
|
+
Función para calcular la media móvil de una serie temporal.
|
|
178
|
+
data: numpy array con los datos de la serie temporal
|
|
179
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
180
|
+
"""
|
|
181
|
+
# return np.convolve(dst_pt, np.ones(window_size)/window_size, mode=mode)
|
|
182
|
+
padding_start = dst_pt[0:window_size]
|
|
183
|
+
padding_end = dst_pt[-window_size:]
|
|
184
|
+
padded_data = np.concatenate([padding_start, dst_pt, padding_end])
|
|
185
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
186
|
+
return ma_full[window_size: -window_size]
|
|
187
|
+
|
|
188
|
+
def get_probas_MA(self, probas, window_size=104, mode='same'):
|
|
189
|
+
"""
|
|
190
|
+
Función para calcular la media móvil de una serie temporal.
|
|
191
|
+
data: numpy array con los datos de la serie temporal
|
|
192
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
193
|
+
"""
|
|
194
|
+
# return np.convolve(dst_pt, np.ones(window_size)/window_size, mode=mode)
|
|
195
|
+
padding_start = probas[0:window_size, :]
|
|
196
|
+
padding_end = probas[-window_size:, :]
|
|
197
|
+
padded_data = np.vstack([padding_start, probas, padding_end])
|
|
198
|
+
ma_full = np.apply_along_axis(lambda m: np.convolve(m, np.ones(window_size)/window_size, mode='same'), axis=0, arr=padded_data)
|
|
199
|
+
return ma_full[window_size: -window_size, :]
|
|
140
200
|
|
|
141
201
|
if __name__ == "__main__":
|
|
142
202
|
import os
|
|
@@ -144,25 +204,54 @@ if __name__ == "__main__":
|
|
|
144
204
|
import numpy as np
|
|
145
205
|
from sarapy.preprocessing import TransformInputData
|
|
146
206
|
from sarapy.mlProcessors import PlantinFMCreator
|
|
147
|
-
import sarapy.utils.getRawOperations as getRawOperations
|
|
148
207
|
from sarapy.mlProcessors import PlantinClassifier
|
|
208
|
+
import json
|
|
209
|
+
|
|
149
210
|
|
|
150
|
-
|
|
151
|
-
|
|
211
|
+
## argumentos de PlantinFMCreator
|
|
212
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
213
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
214
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
215
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
216
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
220
|
+
kwargs_classifier = {"proba_threshold":0.45,
|
|
221
|
+
"use_proba_ma":False,
|
|
222
|
+
"proba_ma_window":10,
|
|
223
|
+
"update_samePlace":True,
|
|
224
|
+
"update_dstpt":True,
|
|
225
|
+
"umbral_proba_dstpt":0.5,
|
|
226
|
+
"umbral_bajo_dstpt":1.5,
|
|
227
|
+
"use_ma":True,
|
|
228
|
+
"dstpt_ma_window":62,
|
|
229
|
+
"use_min_dstpt":False,
|
|
230
|
+
"factor":0.1,
|
|
231
|
+
|
|
232
|
+
"useRatioStats":False,
|
|
233
|
+
"std_weight":1.,
|
|
234
|
+
"useDistancesStats":False,
|
|
235
|
+
"ratio_dcdp_umbral":0.1,
|
|
236
|
+
"dist_umbral":0.5,
|
|
237
|
+
}
|
|
152
238
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
raw_data2 = pd.read_json(historical_data_path, orient="records").to_dict(orient="records")
|
|
239
|
+
historical_data_path = "examples\\2025-09-04\\UPM042N\\historical-data.json"
|
|
240
|
+
with open(historical_data_path, 'r') as file:
|
|
241
|
+
samples = json.load(file)
|
|
157
242
|
|
|
158
|
-
|
|
159
|
-
|
|
243
|
+
fmcreator = PlantinFMCreator(**kwargs_fmcreator)
|
|
244
|
+
tindata = TransformInputData()
|
|
245
|
+
raw_X = tindata.transform(samples)
|
|
160
246
|
|
|
161
247
|
X, dst_pt, inest_pt = fmcreator.fit_transform(raw_X)
|
|
162
248
|
|
|
163
|
-
|
|
164
|
-
rf_clf_wu = PlantinClassifier.PlantinClassifier(classifier_file='modelos\\pipeline_rf.pkl') ##wu = with update
|
|
249
|
+
rf_clf_wu = PlantinClassifier(classifier_file='modelos\\pipeline_rf.pkl')
|
|
165
250
|
|
|
166
|
-
|
|
167
|
-
print(
|
|
168
|
-
|
|
251
|
+
clasificaciones, probas = rf_clf_wu.classify(X, dst_pt, inest_pt, **kwargs_classifier)
|
|
252
|
+
print("media de clasificaciones", clasificaciones.mean())
|
|
253
|
+
print("media de probabilidades", probas.mean(axis=0), probas.std(axis=0), np.median(probas, axis=0))
|
|
254
|
+
print("primeras clasificaciones", clasificaciones[100:105])
|
|
255
|
+
print("primeras probabilidades", probas[100:105])
|
|
256
|
+
print("primeras distorsiones", dst_pt[100:105])
|
|
257
|
+
print("primeras inestabilidades", inest_pt[100:105])
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
###Documentación en https://github.com/lucasbaldezzari/sarapy/blob/main/docs/Docs.md
|
|
2
|
+
import logging
|
|
2
3
|
import numpy as np
|
|
3
4
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
4
5
|
from sarapy.dataProcessing import TLMSensorDataProcessor, TimeSeriesProcessor, GeoProcessor
|
|
@@ -20,7 +21,9 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
20
21
|
|
|
21
22
|
def __init__(self, imputeDistances = True, distanciaMedia:float = 1.8,
|
|
22
23
|
umbral_precision:float = 0.3, dist_mismo_lugar = 0.0, max_dist = 100,
|
|
23
|
-
umbral_ratio_dCdP:float = 0.5, deltaO_medio = 4, baseDeltaP = 10
|
|
24
|
+
umbral_ratio_dCdP:float = 0.5, deltaO_medio = 4, baseDeltaP = 10,
|
|
25
|
+
impute_ratiodcdp = False, umbral_impute_ratiodcdp = -0.8,
|
|
26
|
+
deltaO_ma = False, deltaO_ma_window = 26):
|
|
24
27
|
"""Inicializa la clase FMCreator.
|
|
25
28
|
|
|
26
29
|
Args:
|
|
@@ -30,6 +33,7 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
30
33
|
- umbral_ratio_dCdP: Umbral para el ratio entre el delta de caminata y el delta de pico abierto.
|
|
31
34
|
- deltaO_medio: delta de operación medio entre operaciones.
|
|
32
35
|
"""
|
|
36
|
+
self.logger = logging.getLogger("PlantinFMCreator")
|
|
33
37
|
|
|
34
38
|
self.is_fitted = False
|
|
35
39
|
self.imputeDistances = imputeDistances
|
|
@@ -40,9 +44,10 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
40
44
|
self.umbral_ratio_dCdP = umbral_ratio_dCdP
|
|
41
45
|
self.deltaO_medio = deltaO_medio
|
|
42
46
|
self.baseDeltaP = baseDeltaP
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
self.
|
|
47
|
+
self.impute_ratiodcdp = impute_ratiodcdp
|
|
48
|
+
self.umbral_impute_ratiodcdp = umbral_impute_ratiodcdp
|
|
49
|
+
self.deltaO_ma = deltaO_ma
|
|
50
|
+
self.deltaO_ma_window = deltaO_ma_window
|
|
46
51
|
|
|
47
52
|
def fit(self, X: np.array, y=None)-> np.array:
|
|
48
53
|
"""Fittea el objeto
|
|
@@ -73,7 +78,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
73
78
|
tpDP = timeProcessor._dataPositions
|
|
74
79
|
geoprocessor = GeoProcessor.GeoProcessor()
|
|
75
80
|
|
|
76
|
-
|
|
77
81
|
date_oprc = self.tlmDataProcessor["date_oprc",:] #datos de fecha y hora de operación
|
|
78
82
|
time_ac = self.tlmDataProcessor["TIME_AC",:]/self.baseDeltaP #datos de fecha y hora de operación en formato timestamp
|
|
79
83
|
lats = self.tlmDataProcessor["latitud",:] #latitudes de las operaciones
|
|
@@ -88,7 +92,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
88
92
|
timeData = np.hstack((date_oprc.reshape(-1,1),time_ac.reshape(-1, 1)))
|
|
89
93
|
|
|
90
94
|
self._timeDeltas = timeProcessor.fit_transform(timeData)
|
|
91
|
-
# print(np.median(self._timeDeltas[:,tpDP["ratio_dCdP"]]))
|
|
92
95
|
|
|
93
96
|
##fitteamos geoprocessor con las latitudes y longitudes
|
|
94
97
|
points = np.hstack((lats.reshape(-1,1),longs.reshape(-1,1)))
|
|
@@ -98,6 +101,22 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
98
101
|
self.featureMatrix = np.vstack((self._timeDeltas[:,tpDP["deltaO"]],
|
|
99
102
|
self._timeDeltas[:,tpDP["ratio_dCdP"]],
|
|
100
103
|
self._distances)).T
|
|
104
|
+
|
|
105
|
+
if self.impute_ratiodcdp:
|
|
106
|
+
ratio_dcdp_median = np.median(self.featureMatrix[:, 1])
|
|
107
|
+
self.featureMatrix[:, 1] = np.where(self.featureMatrix[:, 1] < self.umbral_impute_ratiodcdp, ratio_dcdp_median, self.featureMatrix[:, 1])
|
|
108
|
+
|
|
109
|
+
if self.deltaO_ma:
|
|
110
|
+
data = self.featureMatrix[:, 0]
|
|
111
|
+
if self.deltaO_ma_window >= len(data):
|
|
112
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil a deltaO.")
|
|
113
|
+
self.deltaO_ma_window = len(data)
|
|
114
|
+
|
|
115
|
+
padding_start = data[0:self.deltaO_ma_window]
|
|
116
|
+
padding_end = data[-self.deltaO_ma_window:]
|
|
117
|
+
padded_data = np.concatenate([padding_start, data, padding_end])
|
|
118
|
+
ma_full = np.convolve(padded_data, np.ones(self.deltaO_ma_window)/self.deltaO_ma_window, mode='same')
|
|
119
|
+
self.featureMatrix[:, 0] = ma_full[self.deltaO_ma_window: - self.deltaO_ma_window]
|
|
101
120
|
|
|
102
121
|
return self.featureMatrix, self.dst_pt, self.inest_pt
|
|
103
122
|
|
|
@@ -130,12 +149,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
130
149
|
"""Devuelve las distancias entre operaciones."""
|
|
131
150
|
return self._distances
|
|
132
151
|
|
|
133
|
-
@property
|
|
134
|
-
def dataPositions(self):
|
|
135
|
-
"""Devuelve el diccionario con la posición de los datos dentro del array devuelto por transform()."""
|
|
136
|
-
return self._dataPositions
|
|
137
|
-
|
|
138
|
-
|
|
139
152
|
if __name__ == "__main__":
|
|
140
153
|
import pandas as pd
|
|
141
154
|
import json
|
sarapy/mlProcessors/__init__.py
CHANGED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .FertilizerFMCreator import FertilizerFMCreator
|
|
2
|
+
from .FertilizerTransformer import FertilizerTransformer
|
|
3
|
+
from .PlantinClassifier import PlantinClassifier
|
|
4
|
+
from .PlantinFMCreator import PlantinFMCreator
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"FertilizerFMCreator",
|
|
8
|
+
"FertilizerTransformer",
|
|
9
|
+
"PlantinClassifier",
|
|
10
|
+
"PlantinFMCreator",
|
|
11
|
+
]
|
|
@@ -137,7 +137,7 @@ if __name__ == "__main__":
|
|
|
137
137
|
import pandas as pd
|
|
138
138
|
import json
|
|
139
139
|
|
|
140
|
-
historical_data_path = "examples\\2025-
|
|
140
|
+
historical_data_path = "examples\\2025-09-04\\UPM042N\\historical-data.json"
|
|
141
141
|
with open(historical_data_path, 'r') as file:
|
|
142
142
|
historical_data = json.load(file)
|
|
143
143
|
df = pd.DataFrame(historical_data)
|
|
@@ -146,4 +146,5 @@ if __name__ == "__main__":
|
|
|
146
146
|
data_positions = json.load(open("sarapy/preprocessing/telemetriaDataPosition.json", 'r'))
|
|
147
147
|
transform_input_data = TransformInputData()
|
|
148
148
|
transformed_data = transform_input_data.transform(historical_data)
|
|
149
|
-
print(transformed_data[:2])
|
|
149
|
+
print(transformed_data[:2])
|
|
150
|
+
print(transformed_data[0]["date_oprc"])
|
sarapy/preprocessing/__init__.py
CHANGED
|
@@ -1,2 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
from .DistancesImputer import DistancesImputer
|
|
2
|
+
from .FertilizerImputer import FertilizerImputer
|
|
3
|
+
from .TransformInputData import TransformInputData
|
|
4
|
+
from .TransformToOutputData import TransformToOutputData
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"DistancesImputer",
|
|
8
|
+
"FertilizerImputer",
|
|
9
|
+
"TransformInputData",
|
|
10
|
+
"TransformToOutputData"
|
|
11
|
+
]
|
sarapy/stats/__init__.py
CHANGED
|
@@ -1 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
from .stats import *
|
|
2
|
+
|
|
3
|
+
__all__ = [
|
|
4
|
+
"getMA",
|
|
5
|
+
"probabilidadEmpirica",
|
|
6
|
+
"penalizacion",
|
|
7
|
+
"probSaturacion",
|
|
8
|
+
"estimarKDE",
|
|
9
|
+
"saturationProbability",
|
|
10
|
+
"movingProbability",
|
|
11
|
+
"resumen_sensor",
|
|
12
|
+
"detectar_secuencia_saturada"
|
|
13
|
+
]
|
sarapy/stats/stats.py
CHANGED
|
@@ -2,8 +2,7 @@ import numpy as np
|
|
|
2
2
|
from scipy.stats import skew, kurtosis, gaussian_kde
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import logging
|
|
5
|
-
|
|
6
|
-
logging.basicConfig(level=logging.DEBUG)
|
|
5
|
+
logger = logging.getLogger(__name__) # ← "sarapy.stats"
|
|
7
6
|
|
|
8
7
|
def getMA(data, window_size=104, mode='same'):
|
|
9
8
|
"""
|
|
@@ -63,7 +62,7 @@ def saturationProbability(distorsion_data, saturation_mode = "alto", umbrales =
|
|
|
63
62
|
if distorsion_data.shape[0] == 0:
|
|
64
63
|
raise ValueError("La distorsion_data no puede estar vacía.")
|
|
65
64
|
if distorsion_data.shape[0] < 50:
|
|
66
|
-
|
|
65
|
+
logger.warning("La distorsion_data tiene menos de 50 elementos. Los resultados pueden no ser representativos.")
|
|
67
66
|
|
|
68
67
|
ventana_filtered = distorsion_data.copy()
|
|
69
68
|
if saturation_mode == "bajo":
|
|
@@ -75,7 +74,7 @@ def saturationProbability(distorsion_data, saturation_mode = "alto", umbrales =
|
|
|
75
74
|
|
|
76
75
|
##chequeo si la ventana filtrada está vacía
|
|
77
76
|
if ventana_filtered.shape[0] == 0:
|
|
78
|
-
|
|
77
|
+
logger.warning("Ventana filtrada vacía. Se retornará 0.0.")
|
|
79
78
|
return 0.0
|
|
80
79
|
|
|
81
80
|
skew_val = skew(ventana_filtered)
|
|
@@ -84,13 +83,13 @@ def saturationProbability(distorsion_data, saturation_mode = "alto", umbrales =
|
|
|
84
83
|
pena = penalizacion(alpha, skew_val, beta, kurt_val)
|
|
85
84
|
##chequeo que pena no sea nan, sino reemplazo por 1
|
|
86
85
|
if np.isnan(pena):
|
|
87
|
-
|
|
86
|
+
logger.warning("La penalización es NaN. Se reemplazará por 1.")
|
|
88
87
|
pena = 1.0
|
|
89
88
|
# Probabilidad
|
|
90
89
|
proba_empirica = ventana_filtered.shape[0]/distorsion_data.shape[0]
|
|
91
90
|
prob_saturacion = proba_empirica * pena
|
|
92
91
|
|
|
93
|
-
|
|
92
|
+
logger.debug(f"Ventana filtrada: {ventana_filtered.shape[0]}, {distorsion_data.shape[0]}, {proba_empirica}, {pena}")
|
|
94
93
|
return prob_saturacion
|
|
95
94
|
|
|
96
95
|
def movingProbability(distorsion_data, window_size=104, **kwargs):
|
sarapy/utils/__init__.py
CHANGED