sarapy 1.0.1__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sarapy-1.0.1/sarapy.egg-info → sarapy-1.1.1}/PKG-INFO +12 -1
- {sarapy-1.0.1 → sarapy-1.1.1}/README.md +11 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/pyproject.toml +1 -1
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/dataProcessing/OpsProcessor.py +47 -15
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/dataProcessing/TimeSeriesProcessor.py +8 -10
- sarapy-1.1.1/sarapy/mlProcessors/PlantinClassifier.py +145 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/mlProcessors/PlantinFMCreator.py +32 -48
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/preprocessing/TransformInputData.py +5 -1
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/preprocessing/TransformToOutputData.py +35 -4
- sarapy-1.1.1/sarapy/utils/getRawOperations.py +20 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/version.py +1 -1
- {sarapy-1.0.1 → sarapy-1.1.1/sarapy.egg-info}/PKG-INFO +12 -1
- sarapy-1.0.1/sarapy/mlProcessors/PlantinClassifier.py +0 -71
- sarapy-1.0.1/sarapy/utils/getRawOperations.py +0 -25
- {sarapy-1.0.1 → sarapy-1.1.1}/LICENCE +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/__init__.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/dataProcessing/GeoProcessor.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/dataProcessing/TLMSensorDataProcessor.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/dataProcessing/__init__.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/mlProcessors/__init__.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/preprocessing/DistancesImputer.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/preprocessing/FertilizerImputer.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/preprocessing/__init__.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/utils/__init__.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/utils/amg_decoder.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy/utils/amg_ppk.py +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy.egg-info/SOURCES.txt +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy.egg-info/dependency_links.txt +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy.egg-info/requires.txt +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/sarapy.egg-info/top_level.txt +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/setup.cfg +0 -0
- {sarapy-1.0.1 → sarapy-1.1.1}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sarapy
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Home-page: https://github.com/lucasbaldezzari/sarapy
|
|
5
5
|
Author: Lucas Baldezzari
|
|
6
6
|
Author-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
@@ -19,6 +19,17 @@ Requires-Dist: geopy
|
|
|
19
19
|
|
|
20
20
|
Library for processing SARAPICO project metadata of _AMG_.
|
|
21
21
|
|
|
22
|
+
#### Version 1.1.1
|
|
23
|
+
|
|
24
|
+
- Se modifica TimeSeriesProcessor.compute_ratio_dCdP() dado que la versión de vectorize al parecer no funcionaba correctamente en ciertos casos.
|
|
25
|
+
|
|
26
|
+
#### Version 1.1.0
|
|
27
|
+
|
|
28
|
+
Versión 1.1 estable para trabajarse en servidor.
|
|
29
|
+
|
|
30
|
+
- Se implementa nueva estrategia para la clasificación de plantines.
|
|
31
|
+
|
|
32
|
+
|
|
22
33
|
#### Version 1.0.1
|
|
23
34
|
|
|
24
35
|
- Se agrega *__init.py__* dentro del mpdulo _utils_.
|
|
@@ -2,6 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
Library for processing SARAPICO project metadata of _AMG_.
|
|
4
4
|
|
|
5
|
+
#### Version 1.1.1
|
|
6
|
+
|
|
7
|
+
- Se modifica TimeSeriesProcessor.compute_ratio_dCdP() dado que la versión de vectorize al parecer no funcionaba correctamente en ciertos casos.
|
|
8
|
+
|
|
9
|
+
#### Version 1.1.0
|
|
10
|
+
|
|
11
|
+
Versión 1.1 estable para trabajarse en servidor.
|
|
12
|
+
|
|
13
|
+
- Se implementa nueva estrategia para la clasificación de plantines.
|
|
14
|
+
|
|
15
|
+
|
|
5
16
|
#### Version 1.0.1
|
|
6
17
|
|
|
7
18
|
- Se agrega *__init.py__* dentro del mpdulo _utils_.
|
|
@@ -4,7 +4,7 @@ import datetime
|
|
|
4
4
|
from dateutil.tz import tzutc
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
|
-
|
|
7
|
+
from sarapy.mlProcessors import PlantinFMCreator
|
|
8
8
|
from sarapy.mlProcessors import PlantinClassifier
|
|
9
9
|
from sarapy.preprocessing import TransformInputData, TransformToOutputData
|
|
10
10
|
|
|
@@ -22,20 +22,27 @@ class OpsProcessor():
|
|
|
22
22
|
"""Constructor de la clase OpsProcessor.
|
|
23
23
|
|
|
24
24
|
Args:
|
|
25
|
-
-
|
|
25
|
+
- kwargs: Diccionario con los argumentos necesarios instanciar algunas clases.
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
plclass_map = {"classifier_file"
|
|
29
|
-
"umbral_precision"," dist_mismo_lugar", "max_dist",
|
|
30
|
-
"umbral_ratio_dCdP", "deltaO_medio"}
|
|
28
|
+
plclass_map = {"classifier_file"}
|
|
31
29
|
|
|
32
30
|
kwargs_plclass = {}
|
|
33
31
|
##recorro kwargs y usando plclass_map creo un nuevo diccionario con los valores que se pasaron
|
|
34
32
|
for key, value in kwargs.items():
|
|
35
33
|
if key in plclass_map:
|
|
36
34
|
kwargs_plclass[key] = value
|
|
35
|
+
|
|
36
|
+
fmcreator_map = {"imputeDistances", "distanciaMedia", "umbral_precision",
|
|
37
|
+
"dist_mismo_lugar", "max_dist", "umbral_ratio_dCdP", "deltaO_medio"}
|
|
38
|
+
fmcreator_kargs = {}
|
|
39
|
+
##recorro kwargs y usando fmcreator_map creo un nuevo diccionario con los valores que se pasaron
|
|
40
|
+
for key, value in kwargs.items():
|
|
41
|
+
if key in fmcreator_map:
|
|
42
|
+
fmcreator_kargs[key] = value
|
|
37
43
|
|
|
38
44
|
self._plantin_classifier = PlantinClassifier.PlantinClassifier(**kwargs_plclass)
|
|
45
|
+
self.plantinFMCreator = PlantinFMCreator.PlantinFMCreator(**fmcreator_kargs)
|
|
39
46
|
# self._fertilizerFMCreator = FertilizerFMCreator() ## PARA IMPLEMENTAR
|
|
40
47
|
|
|
41
48
|
self._operationsDict = {} ##diccionario de operarios con sus operaciones
|
|
@@ -45,7 +52,7 @@ class OpsProcessor():
|
|
|
45
52
|
self.transformInputData = TransformInputData.TransformInputData()
|
|
46
53
|
self.transformToOutputData = TransformToOutputData.TransformToOutputData()
|
|
47
54
|
|
|
48
|
-
def processOperations(self, data):
|
|
55
|
+
def processOperations(self, data, **kwargs):
|
|
49
56
|
"""Método para procesar las operaciones de los operarios.
|
|
50
57
|
|
|
51
58
|
Se toma una nueva muestra y se procesa la información para clasificar las operaciones considerando el
|
|
@@ -68,6 +75,8 @@ class OpsProcessor():
|
|
|
68
75
|
"Precision": 1000,
|
|
69
76
|
"id_db_dw": 1 #int
|
|
70
77
|
}
|
|
78
|
+
|
|
79
|
+
- kwargs: Diccionario con los argumentos necesarios para la clasificación. Se utiliza para pasar argumentos a los métodos de clasificación.
|
|
71
80
|
|
|
72
81
|
Returns:
|
|
73
82
|
Lista de diccionarios con las clasificaciones. Cada diccionario tiene la forma
|
|
@@ -79,7 +88,7 @@ class OpsProcessor():
|
|
|
79
88
|
newSample = self.transformInputData.fit_transform(data)
|
|
80
89
|
#Si tenemos nuevas operaciones, actualizamos el diccionario de operaciones
|
|
81
90
|
self.updateOperationsDict(newSample) #actualizamos diccionario interno de la clase
|
|
82
|
-
pl_clas = self.classifyForPlantin() #clasificamos las operaciones para plantín
|
|
91
|
+
pl_clas = self.classifyForPlantin(**kwargs) #clasificamos las operaciones para plantín
|
|
83
92
|
ft_clas = newSample[:,7].astype(int) #clasificamos las operaciones para fertilizante
|
|
84
93
|
id_db_h_nums, id_db_dw_nums = self.getActualOperationsNumbers() #obtenemos los números de operaciones desde el diccionario de operaciones
|
|
85
94
|
date_oprc = newSample[:,3]
|
|
@@ -148,14 +157,27 @@ class OpsProcessor():
|
|
|
148
157
|
self.updateNewSamplesValues(ID_NPDPs_newOperations) #actualizo el estado de 'new_sample' en el diccionario de operaciones
|
|
149
158
|
self.updateLastOperations(ID_NPDPs_newOperations) #actualizo la última operación de una muestra de operaciones en el diccionario de operaciones
|
|
150
159
|
|
|
151
|
-
def classifyForPlantin(self):
|
|
160
|
+
def classifyForPlantin(self, **kwargs):
|
|
152
161
|
"""Método para clasificar las operaciones para plantín.
|
|
153
162
|
Se recorre el diccionario de operaciones y se clasifican las operaciones para plantín.
|
|
154
163
|
|
|
164
|
+
Args:
|
|
165
|
+
- kwargs: Diccionario con los argumentos necesarios para la clasificación. Se utiliza para pasar argumentos a los métodos de clasificación.
|
|
166
|
+
|
|
155
167
|
Returns:
|
|
156
168
|
- plantinClassifications: np.array con las clasificaciones de las operaciones para plantín.
|
|
157
169
|
"""
|
|
158
170
|
|
|
171
|
+
key_classify_map = {"feature_matrix", "update_samePlace",
|
|
172
|
+
"useRatioStats", "std_weight", "useDistancesStats",
|
|
173
|
+
"ratio_dcdp_umbral", "dist_umbral"}
|
|
174
|
+
|
|
175
|
+
##recorro kwargs y usando key_classify_map creo un nuevo diccionario con los valores que se pasaron
|
|
176
|
+
classify_kwargs = {}
|
|
177
|
+
for key, value in kwargs.items():
|
|
178
|
+
if key in key_classify_map:
|
|
179
|
+
classify_kwargs[key] = value
|
|
180
|
+
|
|
159
181
|
##creamos/reiniciamos el array con las clasificaciones de las operaciones para plantín
|
|
160
182
|
plantinClassifications = None
|
|
161
183
|
|
|
@@ -165,7 +187,8 @@ class OpsProcessor():
|
|
|
165
187
|
for ID_NPDP in ops_with_new_sample:#self.operationsDict.keys():
|
|
166
188
|
##clasificamos las operaciones para plantín
|
|
167
189
|
operations = self.operationsDict[ID_NPDP]["sample_ops"]
|
|
168
|
-
|
|
190
|
+
features, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(operations)
|
|
191
|
+
classified_ops = self._plantin_classifier.classify(features, **classify_kwargs)
|
|
169
192
|
|
|
170
193
|
##chequeo si first_day_op_classified es True, si es así, no se considera la primera fila de las classified_ops
|
|
171
194
|
if self.operationsDict[ID_NPDP]["first_day_op_classified"]:
|
|
@@ -270,17 +293,26 @@ if __name__ == "__main__":
|
|
|
270
293
|
import pandas as pd
|
|
271
294
|
import numpy as np
|
|
272
295
|
import os
|
|
273
|
-
|
|
296
|
+
import sarapy.utils.getRawOperations as getRawOperations
|
|
297
|
+
from sarapy.dataProcessing import OpsProcessor
|
|
298
|
+
|
|
299
|
+
data_path = os.path.join(os.getcwd(), "examples\\2024-09-04\\UPM012N\\data.json")
|
|
300
|
+
historical_data_path = os.path.join(os.getcwd(), "examples\\2024-09-04\\UPM012N\\historical-data.json")
|
|
301
|
+
|
|
302
|
+
raw_data = pd.read_json(data_path, orient="records").to_dict(orient="records")
|
|
303
|
+
raw_data2 = pd.read_json(historical_data_path, orient="records").to_dict(orient="records")
|
|
274
304
|
|
|
275
|
-
|
|
305
|
+
raw_ops = getRawOperations.getRawOperations(raw_data, raw_data2)
|
|
276
306
|
|
|
277
307
|
import time
|
|
278
308
|
start_time = time.time()
|
|
279
|
-
op = OpsProcessor(classifier_file=
|
|
280
|
-
|
|
309
|
+
op = OpsProcessor.OpsProcessor(classifier_file='modelos\\pipeline_rf.pkl', imputeDistances = False)
|
|
310
|
+
classifications = op.processOperations(raw_ops, update_samePlace=True, useRatioStats=True)
|
|
281
311
|
end_time = time.time()
|
|
282
312
|
execution_time = end_time - start_time
|
|
283
313
|
print("Execution time:", execution_time, "seconds")
|
|
284
314
|
|
|
285
|
-
|
|
286
|
-
|
|
315
|
+
##
|
|
316
|
+
df = pd.DataFrame(classifications)
|
|
317
|
+
tag_seedling = df["tag_seedling"].values
|
|
318
|
+
print(tag_seedling.mean())
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
5
|
-
import warnings
|
|
6
5
|
|
|
7
6
|
class TimeSeriesProcessor(BaseEstimator, TransformerMixin):
|
|
8
7
|
""""
|
|
@@ -36,7 +35,7 @@ class TimeSeriesProcessor(BaseEstimator, TransformerMixin):
|
|
|
36
35
|
self._deltaO = np.diff(X[:,0])
|
|
37
36
|
self._deltaP = X[:,1]
|
|
38
37
|
self._deltaC = self._deltaO - self._deltaP[1:]
|
|
39
|
-
##agregamos un 0 al principio de deltaO y deltaC
|
|
38
|
+
##agregamos un 0 al principio de deltaO y deltaC
|
|
40
39
|
self._deltaO = np.insert(self._deltaO, 0, 0)
|
|
41
40
|
self._deltaC = np.insert(self._deltaC, 0, 0)
|
|
42
41
|
##computamos el ratio entre deltaC y deltaP. Usamos np.vectorize para que compute el ratio para cada elemento del array
|
|
@@ -44,10 +43,6 @@ class TimeSeriesProcessor(BaseEstimator, TransformerMixin):
|
|
|
44
43
|
##cambiamos primer valor de ratio_dCdP por 1
|
|
45
44
|
self._ratio_dCdP[0] = 1
|
|
46
45
|
|
|
47
|
-
##versión 0.2.5
|
|
48
|
-
# self._deltaO = np.hstack((self._deltaO, 0))
|
|
49
|
-
# self._deltaC = np.hstack((self._deltaC, 0))
|
|
50
|
-
|
|
51
46
|
elif X.shape[0] == 1:
|
|
52
47
|
self._deltaO = np.array([0])
|
|
53
48
|
self._deltaC = np.array([0])
|
|
@@ -73,12 +68,15 @@ class TimeSeriesProcessor(BaseEstimator, TransformerMixin):
|
|
|
73
68
|
def fit_transform(self, X: np.array, y=None):
|
|
74
69
|
self.fit(X)
|
|
75
70
|
return self.transform(X)
|
|
76
|
-
|
|
71
|
+
|
|
77
72
|
def compute_ratio_dCdP(self, deltaC, deltaP):
|
|
78
73
|
"""Devuelve el ratio entre el tiempo de caminata y el tiempo de pico abierto."""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
74
|
+
|
|
75
|
+
numerator = deltaC - deltaP
|
|
76
|
+
denominator = deltaC + deltaP
|
|
77
|
+
##reemplazo valores 0 del denominador por 1
|
|
78
|
+
denominator[denominator == 0] = 1
|
|
79
|
+
return numerator/denominator
|
|
82
80
|
|
|
83
81
|
@property
|
|
84
82
|
def deltaO(self):
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
###Documentación en https://github.com/lucasbaldezzari/sarapy/blob/main/docs/Docs.md
|
|
2
|
+
import numpy as np
|
|
3
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
4
|
+
from sklearn.pipeline import Pipeline
|
|
5
|
+
from sarapy.mlProcessors import PlantinFMCreator
|
|
6
|
+
import pickle
|
|
7
|
+
|
|
8
|
+
class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
9
|
+
"""Clase para implementar el pipeline de procesamiento de datos para la clasificación del tipo de operación para plantines."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, classifier_file = ""):
|
|
12
|
+
"""Constructor de la clase PlantinClassifier.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
- classifier_file: String con el nombre del archivo que contiene el clasificador entrenado. El archivo a cargar es un archivo .pkl.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
#cargo el clasificador con pickle. Usamos try para capturar el error FileNotFoundError
|
|
19
|
+
try:
|
|
20
|
+
with open(classifier_file, 'rb') as file:
|
|
21
|
+
self._pipeline = pickle.load(file)
|
|
22
|
+
print("Clasificador cargado con éxito.")
|
|
23
|
+
except FileNotFoundError:
|
|
24
|
+
print("El archivo no se encuentra en el directorio actual.")
|
|
25
|
+
|
|
26
|
+
def classify(self, feature_matrix, update_samePlace:bool = True, **kwargs):
|
|
27
|
+
"""Genera la clasificación de las operaciones para plantines.
|
|
28
|
+
|
|
29
|
+
feature_matrix: Es un array con los datos (strings) provenientes de la base de datos histórica.
|
|
30
|
+
La forma de newData debe ser (n,3). Las columnas de newData deben ser,
|
|
31
|
+
- 1: deltaO
|
|
32
|
+
- 2: ratio_dCdP
|
|
33
|
+
- 3: distancias
|
|
34
|
+
|
|
35
|
+
kwargs: Diccionario con los argumentos necesarios para la clasificación.
|
|
36
|
+
|
|
37
|
+
NOTA: Estas características son necesarias en base a la última versión del modelo de clasificación.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
self.clasificaiones = self._pipeline.predict(feature_matrix)
|
|
41
|
+
|
|
42
|
+
if update_samePlace:
|
|
43
|
+
self.grouped_ops = self.groupOpsSamePlace(feature_matrix, **kwargs)
|
|
44
|
+
self.clasificaiones = self.updateLabelsSamePlace(self.clasificaiones, self.grouped_ops)
|
|
45
|
+
|
|
46
|
+
return self.clasificaiones
|
|
47
|
+
|
|
48
|
+
def groupOpsSamePlace(self, X, useRatioStats = True, std_weight=1, useDistancesStats = True,
|
|
49
|
+
ratio_dcdp_umbral=0.1, dist_umbral=0.5):
|
|
50
|
+
"""
|
|
51
|
+
Función que agrupa las operaciones que se realizaron en el mismo lugar o que sean de limpieza.
|
|
52
|
+
Se entiende por operación en el mismo lugar aquellas operaciones que tengan distancias entre sí menores a 0.5.
|
|
53
|
+
La función tomará las operaciones que tengan distancias menores a 0.5 y la operación anterior, dado que se supone que la
|
|
54
|
+
operación anterior se corresponde a un nuevo sitio de plantado.
|
|
55
|
+
|
|
56
|
+
Las operaciones de limpieza son aquellas que tienen un ratio_dCdP menor a 0.3
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
- X: Array con las features de operaciones. Las columnas son deltaO, ratio_dCdP y distances.
|
|
60
|
+
- useRatioStats: Booleano para usar o no las estadísticas. Por defecto es True.
|
|
61
|
+
- std_weight: Peso para la desviación estándar. Por defecto es 1.
|
|
62
|
+
- ratio_dcdp_umbral: Umbral para el ratio_dCdP. Por defecto es 0.1.
|
|
63
|
+
- dist_umbral: Umbral para la distancia (en metros). Por defecto es 0.5.
|
|
64
|
+
|
|
65
|
+
Retorna:
|
|
66
|
+
- Una lista con los índices de las operaciones agrupadas.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
if useRatioStats:
|
|
70
|
+
median_ratio_dcdp = np.median(X[:,1])
|
|
71
|
+
std_ratio_dcdp = np.std(X[:,1])
|
|
72
|
+
ratio_dcdp_umbral = median_ratio_dcdp - std_weight*std_ratio_dcdp
|
|
73
|
+
|
|
74
|
+
if useDistancesStats:
|
|
75
|
+
median_dist = np.median(X[:,2])
|
|
76
|
+
# std_dist = np.std(X[:,2])
|
|
77
|
+
dist_umbral = median_dist #- std_weight*std_dist
|
|
78
|
+
|
|
79
|
+
##recorro las operaciones y comparo la actual con la siguiente. Si la distancia es menor a 0.5, la agrupo.
|
|
80
|
+
##Si el ratio_dCdP es menor a 0.3, la agrupo.
|
|
81
|
+
grouped_ops = []
|
|
82
|
+
distancias = X[:,2]
|
|
83
|
+
ratio_dcdp = X[:,1]
|
|
84
|
+
flag_cleaning = True
|
|
85
|
+
for i in range(1,X.shape[0]):
|
|
86
|
+
if flag_cleaning:
|
|
87
|
+
sub_group = []
|
|
88
|
+
if distancias[i] < dist_umbral and ratio_dcdp[i] < ratio_dcdp_umbral:
|
|
89
|
+
flag_cleaning = False
|
|
90
|
+
sub_group.append(i-1)
|
|
91
|
+
sub_group.append(i)
|
|
92
|
+
else:
|
|
93
|
+
flag_cleaning = True
|
|
94
|
+
if len(sub_group) > 0:
|
|
95
|
+
grouped_ops.append(sub_group)
|
|
96
|
+
|
|
97
|
+
##recorro grouped_ops y elimino los elementos que se repiten dentro de cada subgrupo y ordeno los indices dentro de cada subgrupo
|
|
98
|
+
for i in range(len(grouped_ops)):
|
|
99
|
+
grouped_ops[i] = list(set(grouped_ops[i]))
|
|
100
|
+
grouped_ops[i].sort()
|
|
101
|
+
|
|
102
|
+
return grouped_ops
|
|
103
|
+
|
|
104
|
+
def updateLabelsSamePlace(self, labels, ops_grouped):
|
|
105
|
+
"""
|
|
106
|
+
Función para actualizar las etiquetas de las operaciones agrupadas en el mismo lugar.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
- labels: Array con las etiquetas de las operaciones.
|
|
110
|
+
- indexes: Array con los índices correspondientes a operaciones repetidas
|
|
111
|
+
"""
|
|
112
|
+
new_labels = labels.copy()
|
|
113
|
+
for indexes in ops_grouped:
|
|
114
|
+
new_labels[indexes[0]] = 1
|
|
115
|
+
new_labels[indexes[1:]] = 0
|
|
116
|
+
|
|
117
|
+
return new_labels
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
import os
|
|
121
|
+
import pandas as pd
|
|
122
|
+
import numpy as np
|
|
123
|
+
from sarapy.preprocessing import TransformInputData
|
|
124
|
+
from sarapy.mlProcessors import PlantinFMCreator
|
|
125
|
+
import sarapy.utils.getRawOperations as getRawOperations
|
|
126
|
+
from sarapy.mlProcessors import PlantinClassifier
|
|
127
|
+
|
|
128
|
+
fmcreator = PlantinFMCreator.PlantinFMCreator(imputeDistances=False)
|
|
129
|
+
tindata = TransformInputData.TransformInputData()
|
|
130
|
+
|
|
131
|
+
data_path = os.path.join(os.getcwd(), "examples\\2024-09-04\\UPM011N\\data.json")
|
|
132
|
+
historical_data_path = os.path.join(os.getcwd(), "examples\\2024-09-04\\UPM011N\\historical-data.json")
|
|
133
|
+
raw_data = pd.read_json(data_path, orient="records").to_dict(orient="records")
|
|
134
|
+
raw_data2 = pd.read_json(historical_data_path, orient="records").to_dict(orient="records")
|
|
135
|
+
|
|
136
|
+
raw_ops = np.array(getRawOperations.getRawOperations(raw_data, raw_data2))
|
|
137
|
+
raw_X = tindata.fit_transform(raw_ops)[:,2:]
|
|
138
|
+
|
|
139
|
+
X, dst_pt, inest_pt = fmcreator.fit_transform(raw_X)
|
|
140
|
+
|
|
141
|
+
rf_clf_nu = PlantinClassifier.PlantinClassifier(classifier_file='modelos\\pipeline_rf.pkl') ##wu = no update
|
|
142
|
+
rf_clf_wu = PlantinClassifier.PlantinClassifier(classifier_file='modelos\\pipeline_rf.pkl') ##wu = with update
|
|
143
|
+
|
|
144
|
+
print(rf_clf_nu.classify(X, update_samePlace = False).mean())
|
|
145
|
+
print(rf_clf_wu.classify(X, update_samePlace=True, useRatioStats=True, useDistancesStats=True).mean())
|
|
@@ -4,7 +4,6 @@ import warnings
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
6
6
|
from sarapy.dataProcessing import TLMSensorDataProcessor, TimeSeriesProcessor, GeoProcessor
|
|
7
|
-
from sarapy.preprocessing import DistancesImputer
|
|
8
7
|
|
|
9
8
|
class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
10
9
|
"""La clase FMCreator se encarga de crear la Feature Matrix (FM) a partir de los datos de telemetría. Se utilizan las clases TLMSensorDataExtractor, TimeSeriesProcessor y GeoProcessor para realizar las transformaciones necesarias.
|
|
@@ -69,11 +68,9 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
69
68
|
- 4: precision del GPS
|
|
70
69
|
|
|
71
70
|
Returns:
|
|
72
|
-
|
|
73
|
-
-
|
|
74
|
-
-
|
|
75
|
-
- 2: ratio_dCdP: Ratio entre el delta de caminata y delta de pico abierto
|
|
76
|
-
- 3: distances: Distancias entre operaciones
|
|
71
|
+
- 0: feature_matrix: (deltaO, ratio_dCdP, distances)
|
|
72
|
+
- 1: dst_pt
|
|
73
|
+
- 2: inest_pt
|
|
77
74
|
"""
|
|
78
75
|
|
|
79
76
|
if not self.is_fitted:
|
|
@@ -88,7 +85,7 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
88
85
|
date_oprc = X[:,1].astype(int) #datos de fecha y hora de operación
|
|
89
86
|
lats = X[:,2].astype(float) #latitudes de las operaciones
|
|
90
87
|
longs = X[:,3].astype(float) #longitudes de las operaciones
|
|
91
|
-
precitions = X[:,4].astype(float) #precision del GPS
|
|
88
|
+
# precitions = X[:,4].astype(float) #precision del GPS
|
|
92
89
|
|
|
93
90
|
##***** OBTENEMOS LOS DATOS PARA FITEAR LOS OBJETOS Y ASÍ PROCESAR LA FM *****
|
|
94
91
|
##obtengo las posiciones de los datos de tlmDataExtractor y timeProcessor
|
|
@@ -106,32 +103,16 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
106
103
|
##genero un array de puntos de la forma (n,2)
|
|
107
104
|
points = np.hstack((lats.reshape(-1,1),longs.reshape(-1,1)))
|
|
108
105
|
self._distances = geoprocessor.fit_transform(points)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
distanceimputer = DistancesImputer.DistancesImputer(distanciaMedia = self.distanciaMedia,
|
|
113
|
-
umbral_precision = self.umbral_precision,
|
|
114
|
-
dist_mismo_lugar = self.dist_mismo_lugar,
|
|
115
|
-
max_dist = self.max_dist,
|
|
116
|
-
umbral_ratio_dCdP = self.umbral_ratio_dCdP,
|
|
117
|
-
deltaO_medio = self.deltaO_medio, keepDims = False, columnToImpute = 0)
|
|
118
|
-
|
|
119
|
-
X_distance_imputation = np.hstack((self._distances.reshape(-1, 1),
|
|
120
|
-
precitions.reshape(-1, 1),
|
|
121
|
-
self._tlmExtracted[:,self._tlmdeDP["GNSSFlag"]].reshape(-1, 1),
|
|
122
|
-
self._tlmExtracted[:,self._tlmdeDP["FIX"]].reshape(-1, 1),
|
|
123
|
-
self._timeDeltas[:,self._tpDP["deltaO"]].reshape(-1, 1),
|
|
124
|
-
self._timeDeltas[:,self._tpDP["ratio_dCdP"]].reshape(-1, 1)))
|
|
125
|
-
|
|
126
|
-
self._distances = distanceimputer.fit_transform(X_distance_imputation)
|
|
106
|
+
|
|
107
|
+
self.dst_pt = self._tlmExtracted[:,self._tlmdeDP["DSTRPT"]]
|
|
108
|
+
self.inest_pt = self._tlmExtracted[:,self._tlmdeDP["INESTPT"]]
|
|
127
109
|
|
|
128
110
|
##armamos la feature matrix
|
|
129
|
-
featureMatrix = np.vstack((self.
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
self._distances)).T
|
|
111
|
+
self.featureMatrix = np.vstack((self._timeDeltas[:,self._tpDP["deltaO"]],
|
|
112
|
+
self._timeDeltas[:,self._tpDP["ratio_dCdP"]],
|
|
113
|
+
self._distances)).T
|
|
133
114
|
|
|
134
|
-
return featureMatrix
|
|
115
|
+
return self.featureMatrix, self.dst_pt, self.inest_pt
|
|
135
116
|
|
|
136
117
|
def fit_transform(self, X: np.array, y=None):
|
|
137
118
|
"""Fittea y transforma los datos de X en la matriz de características.
|
|
@@ -145,11 +126,9 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
145
126
|
- 4: precision del GPS
|
|
146
127
|
|
|
147
128
|
Returns:
|
|
148
|
-
|
|
149
|
-
-
|
|
150
|
-
-
|
|
151
|
-
- 2: ratio_dCdP: Ratio entre el delta de caminata y delta de pico abierto
|
|
152
|
-
- 3: distances: Distancias entre operaciones
|
|
129
|
+
- 0: feature_matrix: (deltaO, ratio_dCdP, distances)
|
|
130
|
+
- 1: dst_pt
|
|
131
|
+
- 2: inest_pt
|
|
153
132
|
"""
|
|
154
133
|
self.fit(X)
|
|
155
134
|
return self.transform(X)
|
|
@@ -176,18 +155,23 @@ class PlantinFMCreator(BaseEstimator, TransformerMixin):
|
|
|
176
155
|
|
|
177
156
|
|
|
178
157
|
if __name__ == "__main__":
|
|
179
|
-
##genero objeto FMCreator
|
|
180
|
-
fmcreator = PlantinFMCreator(imputeDistances=False)
|
|
181
|
-
import pandas as pd
|
|
182
158
|
import os
|
|
183
|
-
|
|
184
|
-
|
|
159
|
+
import pandas as pd
|
|
160
|
+
import numpy as np
|
|
161
|
+
from sarapy.preprocessing import TransformInputData
|
|
162
|
+
from sarapy.mlProcessors import PlantinFMCreator
|
|
163
|
+
import sarapy.utils.getRawOperations as getRawOperations
|
|
185
164
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
165
|
+
fmcreator = PlantinFMCreator.PlantinFMCreator(imputeDistances=False)
|
|
166
|
+
tindata = TransformInputData.TransformInputData()
|
|
167
|
+
|
|
168
|
+
##cargo los archivos examples\2024-09-04\UPM001N\data.json y examples\2024-09-04\UPM001N\historical-data.json
|
|
169
|
+
data_path = os.path.join(os.getcwd(), "examples\\2024-09-04\\UPM001N\\data.json")
|
|
170
|
+
historical_data_path = os.path.join(os.getcwd(), "examples\\2024-09-04\\UPM001N\\historical-data.json")
|
|
171
|
+
raw_data = pd.read_json(data_path, orient="records").to_dict(orient="records")
|
|
172
|
+
raw_data2 = pd.read_json(historical_data_path, orient="records").to_dict(orient="records")
|
|
173
|
+
|
|
174
|
+
raw_ops = np.array(getRawOperations.getRawOperations(raw_data, raw_data2))
|
|
175
|
+
X = tindata.fit_transform(raw_ops)
|
|
176
|
+
|
|
177
|
+
fm, dst_pt, inest_pt = fmcreator.fit_transform(X[:,2:])
|
|
@@ -100,7 +100,11 @@ if __name__ == "__main__":
|
|
|
100
100
|
|
|
101
101
|
transform_input_data = TransformInputData()
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
#cargo "examples\\2024-05-30\\UPM007N\\data.json"
|
|
104
|
+
data = pd.read_json("examples\\2024-05-30\\UPM007N\\data.json").to_dict(orient="records")
|
|
105
|
+
historical_data = pd.read_json("examples\\2024-05-30\\UPM007N\\historical-data.json").to_dict(orient="records")
|
|
106
|
+
|
|
107
|
+
ppk_results = getRawOperations(data,historical_data)
|
|
104
108
|
|
|
105
109
|
X = np.array(ppk_results)
|
|
106
110
|
print(transform_input_data.fit_transform(X))
|
|
@@ -8,7 +8,7 @@ class TransformToOutputData(BaseEstimator, TransformerMixin):
|
|
|
8
8
|
|
|
9
9
|
Args:
|
|
10
10
|
- dataToTransform: array con los datos de las operaciones clasificadas.
|
|
11
|
-
Actualmente el array de dataToTransform es de (n,
|
|
11
|
+
Actualmente el array de dataToTransform es de (n,5) con las columnas siguientes
|
|
12
12
|
|
|
13
13
|
- 0: id_db_h
|
|
14
14
|
- 1: id_db_dw
|
|
@@ -36,6 +36,15 @@ class TransformToOutputData(BaseEstimator, TransformerMixin):
|
|
|
36
36
|
|
|
37
37
|
def fit(self, X:np.array, y = None):
|
|
38
38
|
"""
|
|
39
|
+
Args:
|
|
40
|
+
- X: array con los datos de las operaciones clasificadas.
|
|
41
|
+
Actualmente el array de dataToTransform es de (n,5) con las columnas siguientes
|
|
42
|
+
|
|
43
|
+
- 0: id_db_h
|
|
44
|
+
- 1: id_db_dw
|
|
45
|
+
- 2: tag_seedling
|
|
46
|
+
- 3: tag_fertilizer
|
|
47
|
+
- 4: date_oprc
|
|
39
48
|
"""
|
|
40
49
|
self.is_fitted = True
|
|
41
50
|
keys = ["id_db_h", "id_db_dw", "tag_seedling", "tag_fertilizer", "date_oprc"]
|
|
@@ -45,19 +54,41 @@ class TransformToOutputData(BaseEstimator, TransformerMixin):
|
|
|
45
54
|
date_oprc = np.array([datetime.datetime.fromtimestamp(date, datetime.timezone.utc) for date in date_data])
|
|
46
55
|
self.temp_df.loc[:,"date_oprc"] = date_oprc.flatten()
|
|
47
56
|
##convierto las columnas "id_db_h", "id_db_dw", "tag_seedling", "tag_fertilizer" a int
|
|
48
|
-
|
|
49
|
-
|
|
57
|
+
for col in ["id_db_h", "id_db_dw", "tag_seedling", "tag_fertilizer"]:
|
|
58
|
+
self.temp_df[col] = self.temp_df[col].astype(float).astype(int)
|
|
59
|
+
|
|
50
60
|
return self
|
|
51
61
|
|
|
52
62
|
def transform(self, X:np.array):
|
|
53
63
|
"""
|
|
54
|
-
|
|
64
|
+
Args:
|
|
65
|
+
- X: array con los datos de las operaciones clasificadas.
|
|
66
|
+
Actualmente el array de dataToTransform es de (n,5) con las columnas siguientes
|
|
67
|
+
|
|
68
|
+
- 0: id_db_h
|
|
69
|
+
- 1: id_db_dw
|
|
70
|
+
- 2: tag_seedling
|
|
71
|
+
- 3: tag_fertilizer
|
|
72
|
+
- 4: date_oprc
|
|
73
|
+
Returns:
|
|
74
|
+
Retorna una lista de diccionarios donde cada diccionario contiene los datos de una operación para los campos mencionados anteriormente.
|
|
55
75
|
"""
|
|
56
76
|
|
|
57
77
|
return self.temp_df.to_dict(orient = "records")
|
|
58
78
|
|
|
59
79
|
def fit_transform(self, X:np.array, y = None):
|
|
60
80
|
"""
|
|
81
|
+
Args:
|
|
82
|
+
- X: array con los datos de las operaciones clasificadas.
|
|
83
|
+
Actualmente el array de dataToTransform es de (n,5) con las columnas siguientes
|
|
84
|
+
|
|
85
|
+
- 0: id_db_h
|
|
86
|
+
- 1: id_db_dw
|
|
87
|
+
- 2: tag_seedling
|
|
88
|
+
- 3: tag_fertilizer
|
|
89
|
+
- 4: date_oprc
|
|
90
|
+
Returns:
|
|
91
|
+
Retorna una lista de diccionarios donde cada diccionario contiene los datos de una operación para los campos mencionados anteriormente.
|
|
61
92
|
"""
|
|
62
93
|
self.fit(X)
|
|
63
94
|
return self.transform(X)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from sarapy.utils import amg_ppk
|
|
3
|
+
import os
|
|
4
|
+
def getRawOperations(data, historical_data):
|
|
5
|
+
"""
|
|
6
|
+
Args:
|
|
7
|
+
data_file: Lista de diccionarios con la data
|
|
8
|
+
historical_data_file: Lista de diccionarios con historical_data
|
|
9
|
+
|
|
10
|
+
Returns the raw operations from the database.
|
|
11
|
+
"""
|
|
12
|
+
hash_table = {}
|
|
13
|
+
for datum in data:
|
|
14
|
+
hash_table[datum["timestamp"]] = {"id_db_dw": datum["id"], "id_db_h": 0, "serialized_datum": ""}
|
|
15
|
+
for historical_datum in historical_data:
|
|
16
|
+
if historical_datum["timestamp"] in hash_table:
|
|
17
|
+
hash_table[historical_datum["timestamp"]].update({"id_db_h": historical_datum["id"], "serialized_datum": historical_datum["datum"]})
|
|
18
|
+
ppk_results = amg_ppk.main(hash_table, []) # ToDo: PPK (Fernando)
|
|
19
|
+
|
|
20
|
+
return ppk_results
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
## Version of the package
|
|
2
|
-
__version__ = "1.
|
|
2
|
+
__version__ = "1.1.1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sarapy
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Home-page: https://github.com/lucasbaldezzari/sarapy
|
|
5
5
|
Author: Lucas Baldezzari
|
|
6
6
|
Author-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
@@ -19,6 +19,17 @@ Requires-Dist: geopy
|
|
|
19
19
|
|
|
20
20
|
Library for processing SARAPICO project metadata of _AMG_.
|
|
21
21
|
|
|
22
|
+
#### Version 1.1.1
|
|
23
|
+
|
|
24
|
+
- Se modifica TimeSeriesProcessor.compute_ratio_dCdP() dado que la versión de vectorize al parecer no funcionaba correctamente en ciertos casos.
|
|
25
|
+
|
|
26
|
+
#### Version 1.1.0
|
|
27
|
+
|
|
28
|
+
Versión 1.1 estable para trabajarse en servidor.
|
|
29
|
+
|
|
30
|
+
- Se implementa nueva estrategia para la clasificación de plantines.
|
|
31
|
+
|
|
32
|
+
|
|
22
33
|
#### Version 1.0.1
|
|
23
34
|
|
|
24
35
|
- Se agrega *__init.py__* dentro del mpdulo _utils_.
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
###Documentación en https://github.com/lucasbaldezzari/sarapy/blob/main/docs/Docs.md
|
|
2
|
-
import numpy as np
|
|
3
|
-
from sklearn.base import BaseEstimator, TransformerMixin
|
|
4
|
-
from sklearn.pipeline import Pipeline
|
|
5
|
-
from sarapy.mlProcessors import PlantinFMCreator
|
|
6
|
-
import pickle
|
|
7
|
-
|
|
8
|
-
class PlantinClassifier(BaseEstimator, TransformerMixin):
|
|
9
|
-
"""Clase para implementar el pipeline de procesamiento de datos para la clasificación del tipo de operación para plantines."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, classifier_file = "", **kwargs):
|
|
12
|
-
"""Constructor de la clase PlantinClassifier.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
- classifier_file: String con el nombre del archivo que contiene el clasificador entrenado. El archivo a cargar es un archivo .pkl.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
plclass_map = {"imputeDistances", "distanciaMedia", "umbral_precision"," dist_mismo_lugar", "max_dist",
|
|
19
|
-
"umbral_ratio_dCdP", "deltaO_medio"}
|
|
20
|
-
|
|
21
|
-
kwargs_plfmc = {}
|
|
22
|
-
|
|
23
|
-
##recorro kwargs y usando plclass_map creo un nuevo diccionario con los valores que se pasaron
|
|
24
|
-
for key, value in kwargs.items():
|
|
25
|
-
if key in plclass_map:
|
|
26
|
-
kwargs_plfmc[key] = value
|
|
27
|
-
|
|
28
|
-
self._plantinFMCreator = PlantinFMCreator.PlantinFMCreator(**kwargs_plfmc)
|
|
29
|
-
#cargo el clasificador con pickle. Usamos try para capturar el error FileNotFoundError
|
|
30
|
-
try:
|
|
31
|
-
with open(classifier_file, 'rb') as file:
|
|
32
|
-
self._pipeline = pickle.load(file)
|
|
33
|
-
except FileNotFoundError:
|
|
34
|
-
print("El archivo no se encuentra en el directorio actual.")
|
|
35
|
-
|
|
36
|
-
def classify(self, newData):
|
|
37
|
-
"""Genera la clasificación de las operaciones para plantines.
|
|
38
|
-
|
|
39
|
-
newData: Es un array con los datos (strings) provenientes de la base de datos histórica. La forma de newData debe ser (n,4). Las columnas de newData deben ser,
|
|
40
|
-
- 0: tlm_spbb son los datos de telemetría.
|
|
41
|
-
- 1: date_oprc son los datos de fecha y hora de operación.
|
|
42
|
-
- 2: latitud de la operación
|
|
43
|
-
- 3: longitud de la operación
|
|
44
|
-
- 4: precision del GPS
|
|
45
|
-
"""
|
|
46
|
-
feature_matrix = self._plantinFMCreator.fit_transform(newData)
|
|
47
|
-
return self._pipeline.predict(feature_matrix)
|
|
48
|
-
|
|
49
|
-
if __name__ == "__main__":
|
|
50
|
-
from sarapy.dataProcessing import OpsProcessor
|
|
51
|
-
|
|
52
|
-
#cargo archivo examples\volcado_17112023_NODE_processed.csv
|
|
53
|
-
import pandas as pd
|
|
54
|
-
import os
|
|
55
|
-
path = os.path.join(os.getcwd(), "examples\\volcado_17112023_NODE_processed.csv")
|
|
56
|
-
data_df = pd.read_csv(path, sep=";", )
|
|
57
|
-
raw_data = data_df.to_numpy().astype(str)
|
|
58
|
-
|
|
59
|
-
##tomo raw_data y obtengo muestras de entre 7 a 15 filas una detrás de la otra. El valor de entre 7 y 15 es aleatorio.
|
|
60
|
-
sample = []
|
|
61
|
-
index = 0
|
|
62
|
-
while True:
|
|
63
|
-
random_value = np.random.randint(8, 15)
|
|
64
|
-
if index + random_value < len(raw_data):
|
|
65
|
-
sample.append(raw_data[index:index+random_value])
|
|
66
|
-
else:
|
|
67
|
-
break
|
|
68
|
-
index += random_value
|
|
69
|
-
|
|
70
|
-
plantin_classifier = PlantinClassifier(classifier_file="examples\\pip_lda_imp.pkl",imputeDistances = False)
|
|
71
|
-
plantin_classifier.classify(sample[50][:,2:])
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
from sarapy.utils import amg_ppk
|
|
3
|
-
import os
|
|
4
|
-
def getRawOperations(data_file, historical_data_file):
|
|
5
|
-
"""
|
|
6
|
-
Args:
|
|
7
|
-
data_file: Path to the file with the data.
|
|
8
|
-
historical_data_file: Path to the file with the historical data.
|
|
9
|
-
Returns the raw operations from the database.
|
|
10
|
-
"""
|
|
11
|
-
#cargo examples\2024-05-30\UPM007N\data.json
|
|
12
|
-
data = pd.read_json(os.path.join("examples","2024-05-30","UPM007N","data.json"))
|
|
13
|
-
historical_data = pd.read_json(os.path.join("examples","2024-05-30","UPM007N","historical-data.json"))
|
|
14
|
-
#convierto a lista de diccionarios
|
|
15
|
-
data=data.to_dict(orient="records")
|
|
16
|
-
historical_data=historical_data.to_dict(orient="records")
|
|
17
|
-
hash_table = {}
|
|
18
|
-
for datum in data:
|
|
19
|
-
hash_table[datum["timestamp"]] = {"id_db_dw": datum["id"], "id_db_h": 0, "serialized_datum": ""}
|
|
20
|
-
for historical_datum in historical_data:
|
|
21
|
-
if historical_datum["timestamp"] in hash_table:
|
|
22
|
-
hash_table[historical_datum["timestamp"]].update({"id_db_h": historical_datum["id"], "serialized_datum": historical_datum["datum"]})
|
|
23
|
-
ppk_results = amg_ppk.main(hash_table, []) # ToDo: PPK (Fernando)
|
|
24
|
-
|
|
25
|
-
return ppk_results
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|