sarapy 2.3.0__py3-none-any.whl → 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sarapy/analysis/FeaturesResume.py +151 -47
- sarapy/dataProcessing/OpsProcessor.py +47 -32
- sarapy/dataProcessing/TLMSensorDataProcessor.py +5 -2
- sarapy/mlProcessors/FertilizerTransformer.py +139 -9
- sarapy/mlProcessors/PlantinClassifier.py +65 -23
- sarapy/mlProcessors/PlantinFMCreator.py +25 -12
- sarapy/preprocessing/TransformInputData.py +3 -2
- sarapy/version.py +1 -1
- {sarapy-2.3.0.dist-info → sarapy-3.1.0.dist-info}/METADATA +64 -12
- {sarapy-2.3.0.dist-info → sarapy-3.1.0.dist-info}/RECORD +16 -13
- {sarapy-2.3.0.dist-info → sarapy-3.1.0.dist-info}/WHEEL +1 -1
- sarapy-3.1.0.dist-info/top_level.txt +5 -0
- test/checking_regresor.py +162 -0
- test/probabilidades_test.py +77 -0
- test/test_import.py +5 -0
- sarapy-2.3.0.dist-info/top_level.txt +0 -1
- {sarapy-2.3.0.dist-info → sarapy-3.1.0.dist-info/licenses}/LICENCE +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import pickle
|
|
2
|
+
import logging
|
|
2
3
|
from sarapy.dataProcessing import TLMSensorDataProcessor
|
|
3
4
|
|
|
4
5
|
class FertilizerTransformer:
|
|
@@ -13,22 +14,23 @@ class FertilizerTransformer:
|
|
|
13
14
|
- regresor: Regresor que transforma los valores de distorsión a gramos.
|
|
14
15
|
- poly_features: Grado del polinomio a utilizar en la transformación de los datos.
|
|
15
16
|
"""
|
|
17
|
+
self.logger = logging.getLogger("FertilizerTransformer")
|
|
16
18
|
##cargo el regresor con pickle. Usamos try para capturar el error FileNotFoundError
|
|
17
19
|
try:
|
|
18
20
|
with open(regresor_file, 'rb') as file:
|
|
19
21
|
self._regresor = pickle.load(file)
|
|
20
|
-
|
|
22
|
+
self.logger.info("Regresor cargado con éxito.")
|
|
21
23
|
except FileNotFoundError:
|
|
22
|
-
|
|
24
|
+
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
23
25
|
|
|
24
26
|
##cargo las características polinómicas con pickle. Usamos try para capturar el error FileNotFoundError
|
|
25
27
|
try:
|
|
26
28
|
with open(poly_features_file, 'rb') as file:
|
|
27
29
|
self._poly_features = pickle.load(file)
|
|
28
|
-
|
|
30
|
+
self.logger.info("Características polinómicas cargadas con éxito.")
|
|
29
31
|
except FileNotFoundError:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
33
|
+
|
|
32
34
|
self.fertilizer_grams = None ##cuando no se ha transformado ningún dato, se inicializa en None
|
|
33
35
|
|
|
34
36
|
|
|
@@ -46,23 +48,151 @@ class FertilizerTransformer:
|
|
|
46
48
|
X_poly = self._poly_features.fit_transform(X.reshape(-1, 1))
|
|
47
49
|
self.fertilizer_grams = self._regresor.predict(X_poly)
|
|
48
50
|
|
|
51
|
+
##para valores de distorsión de 13+-0.3 y 15+-0.3, pongo los valores en 8 y 9 gramos, respectivamente
|
|
52
|
+
##uso máscara booleana para encontrar los índices
|
|
53
|
+
mask_13 = (X >= 12.7) & (X <= 13.3)
|
|
54
|
+
mask_15 = (X >= 14.7) & (X <= 15.3)
|
|
55
|
+
self.fertilizer_grams[mask_13] = 8
|
|
56
|
+
self.fertilizer_grams[mask_15] = 9
|
|
57
|
+
|
|
49
58
|
##retorno con shape (n,)
|
|
50
59
|
return self.fertilizer_grams.reshape(-1,)
|
|
51
60
|
|
|
52
61
|
if __name__ == "__main__":
|
|
53
62
|
import pandas as pd
|
|
63
|
+
import numpy as np
|
|
54
64
|
import json
|
|
55
65
|
from sarapy.preprocessing import TransformInputData
|
|
66
|
+
import matplotlib.pyplot as plt
|
|
67
|
+
from collections import Counter
|
|
68
|
+
|
|
69
|
+
fecha = "2025-08-09"
|
|
70
|
+
nodo_interes = "UPM095N"
|
|
56
71
|
|
|
57
|
-
historical_data_path = "examples
|
|
72
|
+
historical_data_path = f"examples//{fecha}//{nodo_interes}//historical-data.json"
|
|
58
73
|
with open(historical_data_path, 'r') as file:
|
|
59
74
|
historical_data = json.load(file)
|
|
60
75
|
|
|
61
76
|
##cargo en un diccionario sarapy\preprocessing\telemetriaDataPosition.json
|
|
62
77
|
data_positions = json.load(open("sarapy/preprocessing/telemetriaDataPosition.json", 'r'))
|
|
63
|
-
transform_input_data = TransformInputData
|
|
78
|
+
transform_input_data = TransformInputData()
|
|
64
79
|
transformed_data = transform_input_data.transform(historical_data)
|
|
65
80
|
|
|
66
|
-
fertransformer = FertilizerTransformer(regresor_file='modelos\\
|
|
81
|
+
fertransformer = FertilizerTransformer(regresor_file='modelos\\regresor_v2.pkl', poly_features_file='modelos\\poly_features_v2.pkl')
|
|
67
82
|
gramos = fertransformer.transform(transformed_data)
|
|
68
|
-
print(gramos[:10])
|
|
83
|
+
print(gramos[:10])
|
|
84
|
+
|
|
85
|
+
df = pd.DataFrame(transformed_data)
|
|
86
|
+
score_ft = df["SC_FT"].values
|
|
87
|
+
|
|
88
|
+
print(score_ft.mean(), gramos.mean())
|
|
89
|
+
print(score_ft.max(), gramos.max())
|
|
90
|
+
print(score_ft.min(), gramos.min())
|
|
91
|
+
|
|
92
|
+
puntos = list(zip(score_ft, gramos))
|
|
93
|
+
conteos = Counter(puntos)
|
|
94
|
+
xs, ys, sizes = zip(*[(x, y, c) for (x, y), c in conteos.items()])
|
|
95
|
+
|
|
96
|
+
np.array([s*10 for s in sizes]).shape
|
|
97
|
+
|
|
98
|
+
points = np.column_stack((score_ft, gramos))
|
|
99
|
+
unique_points, counts = np.unique(points, axis=0, return_counts=True)
|
|
100
|
+
|
|
101
|
+
sizes = np.log1p(counts) * 50
|
|
102
|
+
|
|
103
|
+
plt.figure(figsize=(10, 6))
|
|
104
|
+
handles, labels = plt.gca().get_legend_handles_labels()
|
|
105
|
+
order = [2, 0, 1]
|
|
106
|
+
plt.scatter(unique_points[:,0], unique_points[:,1], color="#5612af", label="Regresor 1 - Orden 12",zorder=1,
|
|
107
|
+
s=sizes)
|
|
108
|
+
plt.scatter(score_ft.mean(), gramos.mean(), color="#af121f", label="Punto promedio", marker='X',s=400)
|
|
109
|
+
plt.title(f'Predicciones Regresor 2 de orden 12 para NODO: {nodo_interes}')
|
|
110
|
+
plt.xlabel('Score de Fertilizante (SC_FT)')
|
|
111
|
+
plt.ylabel('Predicciones de Gramos de Fertilizante')
|
|
112
|
+
plt.grid(True)
|
|
113
|
+
plt.legend()
|
|
114
|
+
plt.savefig(f'predicciones_regresor2_orden12_{nodo_interes}.png')
|
|
115
|
+
plt.show()
|
|
116
|
+
|
|
117
|
+
nodos = ["UPM075N", "UPM076N", "UPM077N", "UPM078N", "UPM079N", "UPM080N", "UPM081N", "UPM082N", "UPM083N", "UPM084N",
|
|
118
|
+
"UPM085N", "UPM086N", "UPM087N", "UPM088N", "UPM089N", "UPM090N", "UPM091N", "UPM092N", "UPM093N", "UPM094N", "UPM095N",
|
|
119
|
+
"UPM096N", "UPM097N", "UPM098N", "UPM099N"]
|
|
120
|
+
|
|
121
|
+
##cargo datos históricos de ejemplo
|
|
122
|
+
|
|
123
|
+
scores_ft_maximos = {}
|
|
124
|
+
scores_ft_minimos = {}
|
|
125
|
+
gramos_maximos = {}
|
|
126
|
+
gramos_minimos = {}
|
|
127
|
+
for nodo in nodos:
|
|
128
|
+
historical_data_path = f"examples//{fecha}//{nodo}//historical-data.json"
|
|
129
|
+
try:
|
|
130
|
+
with open(historical_data_path, 'r') as file:
|
|
131
|
+
historical_data = json.load(file)
|
|
132
|
+
except FileNotFoundError:
|
|
133
|
+
print(f"El archivo {historical_data_path} no se encuentra en el directorio actual.")
|
|
134
|
+
continue
|
|
135
|
+
transform_input_data = TransformInputData()
|
|
136
|
+
transformed_data = transform_input_data.transform(historical_data)
|
|
137
|
+
fertransformer = FertilizerTransformer(regresor_file='modelos\\regresor_v2.pkl', poly_features_file='modelos\\poly_features_v2.pkl')
|
|
138
|
+
gramos = fertransformer.transform(transformed_data)
|
|
139
|
+
gramos_maximos[nodo] = gramos.max()
|
|
140
|
+
gramos_minimos[nodo] = gramos.min()
|
|
141
|
+
|
|
142
|
+
df = pd.DataFrame(transformed_data)
|
|
143
|
+
score_ft = df["SC_FT"].values
|
|
144
|
+
scores_ft_maximos[nodo] = score_ft.max()
|
|
145
|
+
scores_ft_minimos[nodo] = score_ft.min()
|
|
146
|
+
|
|
147
|
+
data = np.array([[gramos_maximos[nodo] for nodo in nodos],
|
|
148
|
+
[scores_ft_maximos[nodo] for nodo in nodos],
|
|
149
|
+
[gramos_minimos[nodo] for nodo in nodos],
|
|
150
|
+
[scores_ft_minimos[nodo] for nodo in nodos]])
|
|
151
|
+
|
|
152
|
+
data_df = pd.DataFrame(data=data.T, index=nodos, columns=['Gramos_Fertilizante', 'Score_Fertilizante', 'Gramos_Fertilizante_Min', 'Score_Fertilizante_Min'])
|
|
153
|
+
|
|
154
|
+
data_df['Gramos_Fertilizante'].plot.bar(figsize=(12, 6), color="#34a853", legend=False)
|
|
155
|
+
#add text labels on top of each bar with the height value
|
|
156
|
+
for i, v in enumerate(data_df['Gramos_Fertilizante']):
|
|
157
|
+
plt.text(i, v + 0.1, f"{v:.1f}", ha='center', va='bottom',color="#34a853")
|
|
158
|
+
plt.title('Máximos de gramos de fertilizante por nodo')
|
|
159
|
+
plt.xlabel('Nodos')
|
|
160
|
+
plt.ylabel('Gramos de Fertilizante')
|
|
161
|
+
plt.grid(axis='y')
|
|
162
|
+
plt.savefig('maximos_gramos_fertilizante_por_nodo.png')
|
|
163
|
+
plt.show()
|
|
164
|
+
|
|
165
|
+
data_df['Gramos_Fertilizante_Min'].plot.bar(figsize=(12, 6), color="#34a853", legend=False)
|
|
166
|
+
#add text labels on top of each bar with the height value
|
|
167
|
+
for i, v in enumerate(data_df['Gramos_Fertilizante_Min']):
|
|
168
|
+
plt.text(i, v + 0.1, f"{v:.1f}", ha='center', va='bottom',color="#34a853")
|
|
169
|
+
plt.title('Mínimos de gramos de fertilizante por nodo')
|
|
170
|
+
plt.xlabel('Nodos')
|
|
171
|
+
plt.ylabel('Gramos de Fertilizante')
|
|
172
|
+
plt.grid(axis='y')
|
|
173
|
+
plt.savefig('minimos_gramos_fertilizante_por_nodo.png')
|
|
174
|
+
plt.show()
|
|
175
|
+
|
|
176
|
+
data_df['Score_Fertilizante'].plot.bar(figsize=(12, 6), color="#3434a8", legend=False)
|
|
177
|
+
#add text labels on top of each bar with the height value
|
|
178
|
+
for i, v in enumerate(data_df['Score_Fertilizante']):
|
|
179
|
+
plt.text(i, v + 0.1, f"{v:.1f}", ha='center', va='bottom',color="#3434a8")
|
|
180
|
+
plt.title('Máximos de score de fertilizante por nodo')
|
|
181
|
+
plt.xlabel('Nodos')
|
|
182
|
+
plt.ylabel('Score de Fertilizante')
|
|
183
|
+
plt.grid(axis='y')
|
|
184
|
+
plt.savefig('maximos_score_fertilizante_por_nodo.png')
|
|
185
|
+
plt.show()
|
|
186
|
+
|
|
187
|
+
data_df['Score_Fertilizante_Min'].plot.bar(figsize=(12, 6), color="#3434a8", legend=False)
|
|
188
|
+
#add text labels on top of each bar with the height value
|
|
189
|
+
for i, v in enumerate(data_df['Score_Fertilizante_Min']):
|
|
190
|
+
plt.text(i, v + 0.1, f"{v:.1f}", ha='center', va='bottom',color="#3434a8")
|
|
191
|
+
plt.title('Mínimos de score de fertilizante por nodo')
|
|
192
|
+
plt.xlabel('Nodos')
|
|
193
|
+
plt.ylabel('Score de Fertilizante')
|
|
194
|
+
plt.grid(axis='y')
|
|
195
|
+
plt.savefig('minimos_score_fertilizante_por_nodo.png')
|
|
196
|
+
plt.show()
|
|
197
|
+
|
|
198
|
+
|
|
@@ -28,11 +28,13 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
28
28
|
self.logger.info("Clasificador cargado con éxito.")
|
|
29
29
|
except FileNotFoundError:
|
|
30
30
|
self.logger.error("El archivo no se encuentra en el directorio actual.")
|
|
31
|
+
raise
|
|
31
32
|
|
|
32
|
-
def classify(self, feature_matrix, dst_pt, inest_pt,
|
|
33
|
+
def classify(self, feature_matrix, dst_pt, inest_pt,
|
|
34
|
+
proba_threshold = 0.45, use_proba_ma = False, proba_ma_window = 10,
|
|
33
35
|
update_samePlace:bool = True, update_dstpt: bool = True,
|
|
34
|
-
umbral_proba_dstpt = 0.
|
|
35
|
-
use_ma =
|
|
36
|
+
umbral_proba_dstpt = 0.5, umbral_bajo_dstpt = 1.5,
|
|
37
|
+
use_ma = True, dstpt_ma_window = 62,
|
|
36
38
|
use_min_dstpt = False, factor = 0.1, **kwargs):
|
|
37
39
|
"""Genera la clasificación de las operaciones para plantines.
|
|
38
40
|
|
|
@@ -50,13 +52,25 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
50
52
|
"""
|
|
51
53
|
|
|
52
54
|
if use_ma:
|
|
53
|
-
dst_pt
|
|
55
|
+
if dst_pt.shape[0] < dstpt_ma_window:
|
|
56
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil.")
|
|
57
|
+
dst_pt = self.get_dstpt_MA(dst_pt, window_size=dst_pt.shape[0], mode='same')
|
|
58
|
+
else:
|
|
59
|
+
dst_pt = self.get_dstpt_MA(dst_pt, window_size=dstpt_ma_window, mode='same')
|
|
54
60
|
|
|
55
61
|
self.clasificaciones = self._pipeline.predict(feature_matrix)
|
|
56
62
|
self.classifications_probas = self._pipeline.predict_proba(feature_matrix)
|
|
57
63
|
|
|
58
|
-
|
|
59
|
-
|
|
64
|
+
if use_proba_ma:
|
|
65
|
+
if proba_ma_window >= self.classifications_probas.shape[0]:
|
|
66
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil a las probabilidades.")
|
|
67
|
+
probas_ma = self.get_probas_MA(self.classifications_probas, window_size=self.classifications_probas.shape[0], mode='same')
|
|
68
|
+
else:
|
|
69
|
+
probas_ma = self.get_probas_MA(self.classifications_probas, window_size=proba_ma_window, mode='same')
|
|
70
|
+
self.clasificaciones[probas_ma[:,1] < proba_threshold] = 0
|
|
71
|
+
else:
|
|
72
|
+
# self.clasificaciones = self._pipeline.classes_[np.argmax(self.classifications_probas, axis=1)]
|
|
73
|
+
self.clasificaciones[self.classifications_probas[:,1] < proba_threshold] = 0
|
|
60
74
|
|
|
61
75
|
if update_samePlace:
|
|
62
76
|
self.grouped_ops = self.groupOpsSamePlace(feature_matrix, **kwargs)
|
|
@@ -69,7 +83,7 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
69
83
|
|
|
70
84
|
return self.clasificaciones, self.classifications_probas
|
|
71
85
|
|
|
72
|
-
def groupOpsSamePlace(self, X, useRatioStats =
|
|
86
|
+
def groupOpsSamePlace(self, X, useRatioStats = False, std_weight=1, useDistancesStats = False,
|
|
73
87
|
ratio_dcdp_umbral=0.1, dist_umbral=0.5):
|
|
74
88
|
"""
|
|
75
89
|
Función que agrupa las operaciones que se realizaron en el mismo lugar o que sean de limpieza.
|
|
@@ -141,7 +155,7 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
141
155
|
return new_labels
|
|
142
156
|
|
|
143
157
|
def updateLabelsFromDSTPT(self, labels, dst_pt, inest_pt,
|
|
144
|
-
umbral_bajo_dstpt = 4, umbral_proba_dstpt = 0.
|
|
158
|
+
umbral_bajo_dstpt = 4, umbral_proba_dstpt = 0.5,
|
|
145
159
|
use_min_dstpt = False, factor = 0.1):
|
|
146
160
|
"""
|
|
147
161
|
Función para actualizar las etiquetas de las operaciones que tengan distorsiones de plantín.
|
|
@@ -164,7 +178,25 @@ class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
|
164
178
|
data: numpy array con los datos de la serie temporal
|
|
165
179
|
window_size: tamaño de la ventana para calcular la media móvil
|
|
166
180
|
"""
|
|
167
|
-
return np.convolve(dst_pt, np.ones(window_size)/window_size, mode=mode)
|
|
181
|
+
# return np.convolve(dst_pt, np.ones(window_size)/window_size, mode=mode)
|
|
182
|
+
padding_start = dst_pt[0:window_size]
|
|
183
|
+
padding_end = dst_pt[-window_size:]
|
|
184
|
+
padded_data = np.concatenate([padding_start, dst_pt, padding_end])
|
|
185
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
186
|
+
return ma_full[window_size: -window_size]
|
|
187
|
+
|
|
188
|
+
def get_probas_MA(self, probas, window_size=104, mode='same'):
|
|
189
|
+
"""
|
|
190
|
+
Función para calcular la media móvil de una serie temporal.
|
|
191
|
+
data: numpy array con los datos de la serie temporal
|
|
192
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
193
|
+
"""
|
|
194
|
+
# return np.convolve(dst_pt, np.ones(window_size)/window_size, mode=mode)
|
|
195
|
+
padding_start = probas[0:window_size, :]
|
|
196
|
+
padding_end = probas[-window_size:, :]
|
|
197
|
+
padded_data = np.vstack([padding_start, probas, padding_end])
|
|
198
|
+
ma_full = np.apply_along_axis(lambda m: np.convolve(m, np.ones(window_size)/window_size, mode='same'), axis=0, arr=padded_data)
|
|
199
|
+
return ma_full[window_size: -window_size, :]
|
|
168
200
|
|
|
169
201
|
if __name__ == "__main__":
|
|
170
202
|
import os
|
|
@@ -176,22 +208,35 @@ if __name__ == "__main__":
|
|
|
176
208
|
import json
|
|
177
209
|
|
|
178
210
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
211
|
+
## argumentos de PlantinFMCreator
|
|
212
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
213
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
214
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
215
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
216
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
220
|
+
kwargs_classifier = {"proba_threshold":0.45,
|
|
221
|
+
"use_proba_ma":False,
|
|
222
|
+
"proba_ma_window":10,
|
|
223
|
+
"update_samePlace":True,
|
|
224
|
+
"update_dstpt":True,
|
|
225
|
+
"umbral_proba_dstpt":0.5,
|
|
226
|
+
"umbral_bajo_dstpt":1.5,
|
|
227
|
+
"use_ma":True,
|
|
228
|
+
"dstpt_ma_window":62,
|
|
229
|
+
"use_min_dstpt":False,
|
|
230
|
+
"factor":0.1,
|
|
231
|
+
|
|
186
232
|
"useRatioStats":False,
|
|
187
233
|
"std_weight":1.,
|
|
188
234
|
"useDistancesStats":False,
|
|
189
|
-
"ratio_dcdp_umbral":0.
|
|
235
|
+
"ratio_dcdp_umbral":0.1,
|
|
190
236
|
"dist_umbral":0.5,
|
|
191
|
-
|
|
192
|
-
"umbral_proba_dstpt":0.85}
|
|
237
|
+
}
|
|
193
238
|
|
|
194
|
-
historical_data_path = "examples\\2025-09-04\\
|
|
239
|
+
historical_data_path = "examples\\2025-09-04\\UPM042N\\historical-data.json"
|
|
195
240
|
with open(historical_data_path, 'r') as file:
|
|
196
241
|
samples = json.load(file)
|
|
197
242
|
|
|
@@ -210,6 +255,3 @@ if __name__ == "__main__":
|
|
|
210
255
|
print("primeras probabilidades", probas[100:105])
|
|
211
256
|
print("primeras distorsiones", dst_pt[100:105])
|
|
212
257
|
print("primeras inestabilidades", inest_pt[100:105])
|
|
213
|
-
|
|
214
|
-
# print(rf_clf_wu.classify(X, dst_pt, inest_pt, **kwargs_classifier))
|
|
215
|
-
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
###Documentación en https://github.com/lucasbaldezzari/sarapy/blob/main/docs/Docs.md
|
|
2
|
+
import logging
|
|
2
3
|
import numpy as np
|
|
3
4
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
4
5
|
from sarapy.dataProcessing import TLMSensorDataProcessor, TimeSeriesProcessor, GeoProcessor
|
|
@@ -20,7 +21,9 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
20
21
|
|
|
21
22
|
def __init__(self, imputeDistances = True, distanciaMedia:float = 1.8,
|
|
22
23
|
umbral_precision:float = 0.3, dist_mismo_lugar = 0.0, max_dist = 100,
|
|
23
|
-
umbral_ratio_dCdP:float = 0.5, deltaO_medio = 4, baseDeltaP = 10
|
|
24
|
+
umbral_ratio_dCdP:float = 0.5, deltaO_medio = 4, baseDeltaP = 10,
|
|
25
|
+
impute_ratiodcdp = False, umbral_impute_ratiodcdp = -0.8,
|
|
26
|
+
deltaO_ma = False, deltaO_ma_window = 26):
|
|
24
27
|
"""Inicializa la clase FMCreator.
|
|
25
28
|
|
|
26
29
|
Args:
|
|
@@ -30,6 +33,7 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
30
33
|
- umbral_ratio_dCdP: Umbral para el ratio entre el delta de caminata y el delta de pico abierto.
|
|
31
34
|
- deltaO_medio: delta de operación medio entre operaciones.
|
|
32
35
|
"""
|
|
36
|
+
self.logger = logging.getLogger("PlantinFMCreator")
|
|
33
37
|
|
|
34
38
|
self.is_fitted = False
|
|
35
39
|
self.imputeDistances = imputeDistances
|
|
@@ -40,9 +44,10 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
40
44
|
self.umbral_ratio_dCdP = umbral_ratio_dCdP
|
|
41
45
|
self.deltaO_medio = deltaO_medio
|
|
42
46
|
self.baseDeltaP = baseDeltaP
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
self.
|
|
47
|
+
self.impute_ratiodcdp = impute_ratiodcdp
|
|
48
|
+
self.umbral_impute_ratiodcdp = umbral_impute_ratiodcdp
|
|
49
|
+
self.deltaO_ma = deltaO_ma
|
|
50
|
+
self.deltaO_ma_window = deltaO_ma_window
|
|
46
51
|
|
|
47
52
|
def fit(self, X: np.array, y=None)-> np.array:
|
|
48
53
|
"""Fittea el objeto
|
|
@@ -73,7 +78,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
73
78
|
tpDP = timeProcessor._dataPositions
|
|
74
79
|
geoprocessor = GeoProcessor.GeoProcessor()
|
|
75
80
|
|
|
76
|
-
|
|
77
81
|
date_oprc = self.tlmDataProcessor["date_oprc",:] #datos de fecha y hora de operación
|
|
78
82
|
time_ac = self.tlmDataProcessor["TIME_AC",:]/self.baseDeltaP #datos de fecha y hora de operación en formato timestamp
|
|
79
83
|
lats = self.tlmDataProcessor["latitud",:] #latitudes de las operaciones
|
|
@@ -88,7 +92,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
88
92
|
timeData = np.hstack((date_oprc.reshape(-1,1),time_ac.reshape(-1, 1)))
|
|
89
93
|
|
|
90
94
|
self._timeDeltas = timeProcessor.fit_transform(timeData)
|
|
91
|
-
# print(np.median(self._timeDeltas[:,tpDP["ratio_dCdP"]]))
|
|
92
95
|
|
|
93
96
|
##fitteamos geoprocessor con las latitudes y longitudes
|
|
94
97
|
points = np.hstack((lats.reshape(-1,1),longs.reshape(-1,1)))
|
|
@@ -98,6 +101,22 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
98
101
|
self.featureMatrix = np.vstack((self._timeDeltas[:,tpDP["deltaO"]],
|
|
99
102
|
self._timeDeltas[:,tpDP["ratio_dCdP"]],
|
|
100
103
|
self._distances)).T
|
|
104
|
+
|
|
105
|
+
if self.impute_ratiodcdp:
|
|
106
|
+
ratio_dcdp_median = np.median(self.featureMatrix[:, 1])
|
|
107
|
+
self.featureMatrix[:, 1] = np.where(self.featureMatrix[:, 1] < self.umbral_impute_ratiodcdp, ratio_dcdp_median, self.featureMatrix[:, 1])
|
|
108
|
+
|
|
109
|
+
if self.deltaO_ma:
|
|
110
|
+
data = self.featureMatrix[:, 0]
|
|
111
|
+
if self.deltaO_ma_window >= len(data):
|
|
112
|
+
self.logger.warning("El tamaño de la serie temporal es menor que la ventana de media móvil. No se aplicará media móvil a deltaO.")
|
|
113
|
+
self.deltaO_ma_window = len(data)
|
|
114
|
+
|
|
115
|
+
padding_start = data[0:self.deltaO_ma_window]
|
|
116
|
+
padding_end = data[-self.deltaO_ma_window:]
|
|
117
|
+
padded_data = np.concatenate([padding_start, data, padding_end])
|
|
118
|
+
ma_full = np.convolve(padded_data, np.ones(self.deltaO_ma_window)/self.deltaO_ma_window, mode='same')
|
|
119
|
+
self.featureMatrix[:, 0] = ma_full[self.deltaO_ma_window: - self.deltaO_ma_window]
|
|
101
120
|
|
|
102
121
|
return self.featureMatrix, self.dst_pt, self.inest_pt
|
|
103
122
|
|
|
@@ -130,12 +149,6 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
130
149
|
"""Devuelve las distancias entre operaciones."""
|
|
131
150
|
return self._distances
|
|
132
151
|
|
|
133
|
-
@property
|
|
134
|
-
def dataPositions(self):
|
|
135
|
-
"""Devuelve el diccionario con la posición de los datos dentro del array devuelto por transform()."""
|
|
136
|
-
return self._dataPositions
|
|
137
|
-
|
|
138
|
-
|
|
139
152
|
if __name__ == "__main__":
|
|
140
153
|
import pandas as pd
|
|
141
154
|
import json
|
|
@@ -137,7 +137,7 @@ if __name__ == "__main__":
|
|
|
137
137
|
import pandas as pd
|
|
138
138
|
import json
|
|
139
139
|
|
|
140
|
-
historical_data_path = "examples\\2025-
|
|
140
|
+
historical_data_path = "examples\\2025-09-04\\UPM042N\\historical-data.json"
|
|
141
141
|
with open(historical_data_path, 'r') as file:
|
|
142
142
|
historical_data = json.load(file)
|
|
143
143
|
df = pd.DataFrame(historical_data)
|
|
@@ -146,4 +146,5 @@ if __name__ == "__main__":
|
|
|
146
146
|
data_positions = json.load(open("sarapy/preprocessing/telemetriaDataPosition.json", 'r'))
|
|
147
147
|
transform_input_data = TransformInputData()
|
|
148
148
|
transformed_data = transform_input_data.transform(historical_data)
|
|
149
|
-
print(transformed_data[:2])
|
|
149
|
+
print(transformed_data[:2])
|
|
150
|
+
print(transformed_data[0]["date_oprc"])
|
sarapy/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
## Version of the package
|
|
2
|
-
__version__ = "
|
|
2
|
+
__version__ = "3.1.0"
|
|
@@ -1,24 +1,76 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: sarapy
|
|
3
|
-
Version:
|
|
4
|
-
|
|
5
|
-
Author: Lucas Baldezzari
|
|
3
|
+
Version: 3.1.0
|
|
4
|
+
Summary: Library for Sarapico Metadata processing
|
|
6
5
|
Author-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 10
|
|
10
|
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 11
|
|
11
|
+
Classifier: Operating System :: Unix
|
|
12
|
+
Requires-Python: >=3.9
|
|
9
13
|
Description-Content-Type: text/markdown
|
|
10
14
|
License-File: LICENCE
|
|
11
|
-
Requires-Dist: numpy
|
|
12
|
-
Requires-Dist:
|
|
13
|
-
Requires-Dist:
|
|
14
|
-
Requires-Dist:
|
|
15
|
-
Requires-Dist:
|
|
16
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: numpy>=1.23
|
|
16
|
+
Requires-Dist: pandas>=1.5
|
|
17
|
+
Requires-Dist: scipy>=1.9
|
|
18
|
+
Requires-Dist: scikit-learn>=1.2
|
|
19
|
+
Requires-Dist: matplotlib>=3.6
|
|
20
|
+
Requires-Dist: seaborn>=0.12
|
|
21
|
+
Requires-Dist: requests>=2.28
|
|
22
|
+
Requires-Dist: python-dotenv>=1.0
|
|
23
|
+
Requires-Dist: geopy>=2.3
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest; extra == "dev"
|
|
26
|
+
Requires-Dist: black; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff; extra == "dev"
|
|
28
|
+
Requires-Dist: mypy; extra == "dev"
|
|
29
|
+
Dynamic: license-file
|
|
17
30
|
|
|
18
31
|
# SARAPY
|
|
19
32
|
|
|
20
33
|
Library for processing SARAPICO project metadata of _AMG SA_.
|
|
21
34
|
|
|
35
|
+
#### Version 3.1.0
|
|
36
|
+
|
|
37
|
+
- Se actualiza regresor para estimar fertilizante.
|
|
38
|
+
- Actualización de archivos para instalar la libería.
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
#### Version 3.0.0
|
|
42
|
+
- Se mejora la forma de obtener valores de media movil para todas las variables en las que se usa.
|
|
43
|
+
- Se corrigen bugs debido a nodos con pocas operaciones.
|
|
44
|
+
- Se corrigen errores a la hora de pasar parámetros a los métodos de algunas clases.
|
|
45
|
+
- Se configuran parámetros de fmcreator y plantin_classifier para el reetiquetado, los mismos son:
|
|
46
|
+
|
|
47
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
48
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
49
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
50
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
51
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
55
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
56
|
+
"use_proba_ma":False,
|
|
57
|
+
"proba_ma_window":10,
|
|
58
|
+
"update_samePlace":True,
|
|
59
|
+
"update_dstpt":True,
|
|
60
|
+
"umbral_proba_dstpt":0.5,
|
|
61
|
+
"umbral_bajo_dstpt":1.5,
|
|
62
|
+
"use_ma":True,
|
|
63
|
+
"dstpt_ma_window":62,
|
|
64
|
+
"use_min_dstpt":False,
|
|
65
|
+
"factor":0.1,
|
|
66
|
+
|
|
67
|
+
"useRatioStats":False,
|
|
68
|
+
"std_weight":1.,
|
|
69
|
+
"useDistancesStats":False,
|
|
70
|
+
"ratio_dcdp_umbral":0.1,
|
|
71
|
+
"dist_umbral":0.5,
|
|
72
|
+
}
|
|
73
|
+
|
|
22
74
|
#### Version 2.3.0
|
|
23
75
|
|
|
24
76
|
- Se agregan funcionalidades.
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
sarapy/__init__.py,sha256=aVoywqGSscYYDycLaYJnz08dlQabl9gH0h4Q5KtHM9o,74
|
|
2
|
-
sarapy/version.py,sha256=
|
|
3
|
-
sarapy/analysis/FeaturesResume.py,sha256=
|
|
2
|
+
sarapy/version.py,sha256=cXLYU3050O7pgJLDnRyZy-RRSpT1wyHNfQXLgIuYsaU,48
|
|
3
|
+
sarapy/analysis/FeaturesResume.py,sha256=fqKpDy7Py3QHUMtrS8r-KE25ah4HjkJxBKoZtHdORAQ,31946
|
|
4
4
|
sarapy/analysis/__init__.py,sha256=i6QGXmnuA-k6Gh6639TinluogMhLGIiL-tiR_S2i2Ok,74
|
|
5
5
|
sarapy/dataProcessing/GeoProcessor.py,sha256=ARjgKTXDVdf_cFCXyFmzlnmmmay3HG3q-yeJ9QrAcQU,5919
|
|
6
|
-
sarapy/dataProcessing/OpsProcessor.py,sha256=
|
|
7
|
-
sarapy/dataProcessing/TLMSensorDataProcessor.py,sha256=
|
|
6
|
+
sarapy/dataProcessing/OpsProcessor.py,sha256=HT9TvldVwoLKTLjtmtwA78KetoxUjuzw5Lce2WqM6CU,19432
|
|
7
|
+
sarapy/dataProcessing/TLMSensorDataProcessor.py,sha256=NhRxMoA4SHwyhD61xn6m5UIp1ZrDhEnHaFfhveMJLRQ,3689
|
|
8
8
|
sarapy/dataProcessing/TimeSeriesProcessor.py,sha256=aig3A3_SCa9FVSWxGWiapBUX7Lj9Wi1BVyZi-XXZZYQ,6414
|
|
9
9
|
sarapy/dataProcessing/__init__.py,sha256=Kqs5sFtq6RMEa3KLJFbsGRoYsIxHL1UUGMuplyCyQFk,200
|
|
10
10
|
sarapy/mlProcessors/FertilizerFMCreator.py,sha256=LNi86CI6eVuQ0_UBVJNd_-L79fcY2-zY2NCm9ypl6OM,2354
|
|
11
|
-
sarapy/mlProcessors/FertilizerTransformer.py,sha256=
|
|
12
|
-
sarapy/mlProcessors/PlantinClassifier.py,sha256=
|
|
13
|
-
sarapy/mlProcessors/PlantinFMCreator.py,sha256=
|
|
11
|
+
sarapy/mlProcessors/FertilizerTransformer.py,sha256=MTsuplwuRdDMVzycRRYZa98ZOEgRhBcjaDWQg6kyph4,8933
|
|
12
|
+
sarapy/mlProcessors/PlantinClassifier.py,sha256=yNck3R8wGfy6rjb8Q2mxVdu63NWJgJ6UmqUORa2qvbk,12491
|
|
13
|
+
sarapy/mlProcessors/PlantinFMCreator.py,sha256=y8rdkUb-84-ONa4kJOY2R2zAfuOXtUJVBEhUPhDncyY,7852
|
|
14
14
|
sarapy/mlProcessors/__init__.py,sha256=wHnqLn15KRCOYI9WWS8_ArraG_c4UEfDCi19muwjN14,335
|
|
15
15
|
sarapy/preprocessing/DistancesImputer.py,sha256=NvbVAh5m0yFxVgDbEFnEX7RSG13qLjO7i2gqjDAWsf4,9106
|
|
16
16
|
sarapy/preprocessing/FertilizerImputer.py,sha256=zK6ONAilwPHvj-bC7yxnQYOkDBCCkWh6__57vYK9anM,1490
|
|
17
|
-
sarapy/preprocessing/TransformInputData.py,sha256=
|
|
17
|
+
sarapy/preprocessing/TransformInputData.py,sha256=gT0S_ANSmSODPru4DVK7qpA7ZqnRoPwNyLkV-VJWvAU,8584
|
|
18
18
|
sarapy/preprocessing/TransformToOutputData.py,sha256=2hSeFkrSt1OO_jiX4SQJtL3Dhm_9xLy7zCgkj8jo9OE,3137
|
|
19
19
|
sarapy/preprocessing/__init__.py,sha256=2if1rcq8WCk8u4M3bHcE_tY2hLmZxwNG4qdLNJR1Ixg,331
|
|
20
20
|
sarapy/stats/__init__.py,sha256=X4IZsG2TxZUtXYmONvVJymHInnLHMqiThmW6U2ZMd8U,258
|
|
@@ -22,8 +22,11 @@ sarapy/stats/stats.py,sha256=eVmi6w9QcwvwuDK3yOr1Z8wQV-1oT3QJujDqWZFYzGc,11424
|
|
|
22
22
|
sarapy/utils/__init__.py,sha256=TD_-dGgPQBD13hyf2OqDUET0XZOXTduJD1ht8tjZF_0,257
|
|
23
23
|
sarapy/utils/plotting.py,sha256=kX-eYw618urMcUBkNPviQZdBziDc_TR3GInTsO90kU4,4065
|
|
24
24
|
sarapy/utils/utils.py,sha256=NSSeZHeLnQWcFa6vfJ2nVkptX2dIyiCMlZPBmsgEvjo,7106
|
|
25
|
-
sarapy-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
sarapy-
|
|
25
|
+
sarapy-3.1.0.dist-info/licenses/LICENCE,sha256=N00sU3vSQ6F5c2vML9_qP4IFTkCPFFj0YGDB2CZP-uQ,840
|
|
26
|
+
test/checking_regresor.py,sha256=F9JwCHdg18dC_flNold_Jc9Gkuib9RDo5jeB-xgReqo,5932
|
|
27
|
+
test/probabilidades_test.py,sha256=4mGwuYNQGQd-LurL63pBpaaw5QHIN1PymBPhzRQB9Hc,3254
|
|
28
|
+
test/test_import.py,sha256=qYBVkwb3ACzGyYj4cVBNmrPAWw-cuITHgWP4oJYdrto,56
|
|
29
|
+
sarapy-3.1.0.dist-info/METADATA,sha256=KvC1CO3uzNDPh--GUX0-3wippO0ThjttyoUGVk8m2Tg,8278
|
|
30
|
+
sarapy-3.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
31
|
+
sarapy-3.1.0.dist-info/top_level.txt,sha256=gsDJg1lRhlnLTvKqH039RW-fsHlHgX6ZTxaM6GheziQ,34
|
|
32
|
+
sarapy-3.1.0.dist-info/RECORD,,
|