sarapy 2.3.0__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sarapy-2.3.0 → sarapy-3.1.0}/PKG-INFO +64 -12
- sarapy-2.3.0/sarapy.egg-info/PKG-INFO → sarapy-3.1.0/README.md +39 -17
- sarapy-3.1.0/pyproject.toml +47 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/analysis/FeaturesResume.py +151 -47
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/dataProcessing/OpsProcessor.py +47 -32
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/dataProcessing/TLMSensorDataProcessor.py +5 -2
- sarapy-3.1.0/sarapy/mlProcessors/FertilizerTransformer.py +198 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/mlProcessors/PlantinClassifier.py +65 -23
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/mlProcessors/PlantinFMCreator.py +25 -12
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/preprocessing/TransformInputData.py +3 -2
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/version.py +1 -1
- sarapy-2.3.0/README.md → sarapy-3.1.0/sarapy.egg-info/PKG-INFO +69 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy.egg-info/SOURCES.txt +4 -1
- sarapy-3.1.0/sarapy.egg-info/requires.txt +15 -0
- sarapy-3.1.0/sarapy.egg-info/top_level.txt +5 -0
- sarapy-3.1.0/setup.py +40 -0
- sarapy-3.1.0/test/checking_regresor.py +162 -0
- sarapy-3.1.0/test/probabilidades_test.py +77 -0
- sarapy-3.1.0/test/test_import.py +5 -0
- sarapy-2.3.0/pyproject.toml +0 -21
- sarapy-2.3.0/sarapy/mlProcessors/FertilizerTransformer.py +0 -68
- sarapy-2.3.0/sarapy.egg-info/requires.txt +0 -6
- sarapy-2.3.0/sarapy.egg-info/top_level.txt +0 -1
- sarapy-2.3.0/setup.py +0 -35
- {sarapy-2.3.0 → sarapy-3.1.0}/LICENCE +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/analysis/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/dataProcessing/GeoProcessor.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/dataProcessing/TimeSeriesProcessor.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/dataProcessing/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/mlProcessors/FertilizerFMCreator.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/mlProcessors/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/preprocessing/DistancesImputer.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/preprocessing/FertilizerImputer.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/preprocessing/TransformToOutputData.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/preprocessing/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/stats/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/stats/stats.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/utils/__init__.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/utils/plotting.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy/utils/utils.py +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/sarapy.egg-info/dependency_links.txt +0 -0
- {sarapy-2.3.0 → sarapy-3.1.0}/setup.cfg +0 -0
|
@@ -1,24 +1,76 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: sarapy
|
|
3
|
-
Version:
|
|
4
|
-
|
|
5
|
-
Author: Lucas Baldezzari
|
|
3
|
+
Version: 3.1.0
|
|
4
|
+
Summary: Library for Sarapico Metadata processing
|
|
6
5
|
Author-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 10
|
|
10
|
+
Classifier: Operating System :: Microsoft :: Windows :: Windows 11
|
|
11
|
+
Classifier: Operating System :: Unix
|
|
12
|
+
Requires-Python: >=3.9
|
|
9
13
|
Description-Content-Type: text/markdown
|
|
10
14
|
License-File: LICENCE
|
|
11
|
-
Requires-Dist: numpy
|
|
12
|
-
Requires-Dist:
|
|
13
|
-
Requires-Dist:
|
|
14
|
-
Requires-Dist:
|
|
15
|
-
Requires-Dist:
|
|
16
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: numpy>=1.23
|
|
16
|
+
Requires-Dist: pandas>=1.5
|
|
17
|
+
Requires-Dist: scipy>=1.9
|
|
18
|
+
Requires-Dist: scikit-learn>=1.2
|
|
19
|
+
Requires-Dist: matplotlib>=3.6
|
|
20
|
+
Requires-Dist: seaborn>=0.12
|
|
21
|
+
Requires-Dist: requests>=2.28
|
|
22
|
+
Requires-Dist: python-dotenv>=1.0
|
|
23
|
+
Requires-Dist: geopy>=2.3
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest; extra == "dev"
|
|
26
|
+
Requires-Dist: black; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff; extra == "dev"
|
|
28
|
+
Requires-Dist: mypy; extra == "dev"
|
|
29
|
+
Dynamic: license-file
|
|
17
30
|
|
|
18
31
|
# SARAPY
|
|
19
32
|
|
|
20
33
|
Library for processing SARAPICO project metadata of _AMG SA_.
|
|
21
34
|
|
|
35
|
+
#### Version 3.1.0
|
|
36
|
+
|
|
37
|
+
- Se actualiza regresor para estimar fertilizante.
|
|
38
|
+
- Actualización de archivos para instalar la libería.
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
#### Version 3.0.0
|
|
42
|
+
- Se mejora la forma de obtener valores de media movil para todas las variables en las que se usa.
|
|
43
|
+
- Se corrigen bugs debido a nodos con pocas operaciones.
|
|
44
|
+
- Se corrigen errores a la hora de pasar parámetros a los métodos de algunas clases.
|
|
45
|
+
- Se configuran parámetros de fmcreator y plantin_classifier para el reetiquetado, los mismos son:
|
|
46
|
+
|
|
47
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
48
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
49
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
50
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
51
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
55
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
56
|
+
"use_proba_ma":False,
|
|
57
|
+
"proba_ma_window":10,
|
|
58
|
+
"update_samePlace":True,
|
|
59
|
+
"update_dstpt":True,
|
|
60
|
+
"umbral_proba_dstpt":0.5,
|
|
61
|
+
"umbral_bajo_dstpt":1.5,
|
|
62
|
+
"use_ma":True,
|
|
63
|
+
"dstpt_ma_window":62,
|
|
64
|
+
"use_min_dstpt":False,
|
|
65
|
+
"factor":0.1,
|
|
66
|
+
|
|
67
|
+
"useRatioStats":False,
|
|
68
|
+
"std_weight":1.,
|
|
69
|
+
"useDistancesStats":False,
|
|
70
|
+
"ratio_dcdp_umbral":0.1,
|
|
71
|
+
"dist_umbral":0.5,
|
|
72
|
+
}
|
|
73
|
+
|
|
22
74
|
#### Version 2.3.0
|
|
23
75
|
|
|
24
76
|
- Se agregan funcionalidades.
|
|
@@ -1,24 +1,46 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: sarapy
|
|
3
|
-
Version: 2.3.0
|
|
4
|
-
Home-page: https://github.com/lucasbaldezzari/sarapy
|
|
5
|
-
Author: Lucas Baldezzari
|
|
6
|
-
Author-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
7
|
-
Maintainer-email: Lucas Baldezzari <lmbaldezzari@gmail.com>
|
|
8
|
-
License: For private use only. Owner AMG Servicios profesionales (Mercedes, Uruguay)
|
|
9
|
-
Description-Content-Type: text/markdown
|
|
10
|
-
License-File: LICENCE
|
|
11
|
-
Requires-Dist: numpy
|
|
12
|
-
Requires-Dist: matplotlib
|
|
13
|
-
Requires-Dist: pandas
|
|
14
|
-
Requires-Dist: scipy
|
|
15
|
-
Requires-Dist: scikit-learn
|
|
16
|
-
Requires-Dist: geopy
|
|
17
|
-
|
|
18
1
|
# SARAPY
|
|
19
2
|
|
|
20
3
|
Library for processing SARAPICO project metadata of _AMG SA_.
|
|
21
4
|
|
|
5
|
+
#### Version 3.1.0
|
|
6
|
+
|
|
7
|
+
- Se actualiza regresor para estimar fertilizante.
|
|
8
|
+
- Actualización de archivos para instalar la libería.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
#### Version 3.0.0
|
|
12
|
+
- Se mejora la forma de obtener valores de media movil para todas las variables en las que se usa.
|
|
13
|
+
- Se corrigen bugs debido a nodos con pocas operaciones.
|
|
14
|
+
- Se corrigen errores a la hora de pasar parámetros a los métodos de algunas clases.
|
|
15
|
+
- Se configuran parámetros de fmcreator y plantin_classifier para el reetiquetado, los mismos son:
|
|
16
|
+
|
|
17
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
18
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
19
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
20
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
21
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
25
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
26
|
+
"use_proba_ma":False,
|
|
27
|
+
"proba_ma_window":10,
|
|
28
|
+
"update_samePlace":True,
|
|
29
|
+
"update_dstpt":True,
|
|
30
|
+
"umbral_proba_dstpt":0.5,
|
|
31
|
+
"umbral_bajo_dstpt":1.5,
|
|
32
|
+
"use_ma":True,
|
|
33
|
+
"dstpt_ma_window":62,
|
|
34
|
+
"use_min_dstpt":False,
|
|
35
|
+
"factor":0.1,
|
|
36
|
+
|
|
37
|
+
"useRatioStats":False,
|
|
38
|
+
"std_weight":1.,
|
|
39
|
+
"useDistancesStats":False,
|
|
40
|
+
"ratio_dcdp_umbral":0.1,
|
|
41
|
+
"dist_umbral":0.5,
|
|
42
|
+
}
|
|
43
|
+
|
|
22
44
|
#### Version 2.3.0
|
|
23
45
|
|
|
24
46
|
- Se agregan funcionalidades.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sarapy"
|
|
7
|
+
version = "3.1.0"
|
|
8
|
+
description = "Library for Sarapico Metadata processing"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Lucas Baldezzari", email = "lmbaldezzari@gmail.com" }
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
license = { text = "MIT" }
|
|
17
|
+
|
|
18
|
+
classifiers = [
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Operating System :: Microsoft :: Windows :: Windows 10",
|
|
22
|
+
"Operating System :: Microsoft :: Windows :: Windows 11",
|
|
23
|
+
"Operating System :: Unix",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
dependencies = [
|
|
27
|
+
"numpy>=1.23",
|
|
28
|
+
"pandas>=1.5",
|
|
29
|
+
"scipy>=1.9",
|
|
30
|
+
"scikit-learn>=1.2",
|
|
31
|
+
"matplotlib>=3.6",
|
|
32
|
+
"seaborn>=0.12",
|
|
33
|
+
"requests>=2.28",
|
|
34
|
+
"python-dotenv>=1.0",
|
|
35
|
+
"geopy>=2.3",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
dev = [
|
|
40
|
+
"pytest",
|
|
41
|
+
"black",
|
|
42
|
+
"ruff",
|
|
43
|
+
"mypy",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[tool.setuptools]
|
|
47
|
+
packages = { find = {} }
|
|
@@ -17,8 +17,8 @@ import re
|
|
|
17
17
|
from datetime import datetime, time
|
|
18
18
|
|
|
19
19
|
class FeaturesResume():
|
|
20
|
-
def __init__(self, raw_data, info="", filtrar=None, updateTagSeedling=False,
|
|
21
|
-
kwargs_fmcreator=None, kwargs_classifier=None, timeFilter=None):
|
|
20
|
+
def __init__(self, raw_data, info="", filtrar=None, updateTagSeedling=False, outliers=None,
|
|
21
|
+
kwargs_fmcreator=None, kwargs_classifier=None, timeFilter=None, window_size_ma=104):
|
|
22
22
|
"""
|
|
23
23
|
Constructor para inicializar la clase FeaturesResume.
|
|
24
24
|
|
|
@@ -29,42 +29,62 @@ class FeaturesResume():
|
|
|
29
29
|
self.updateTagSeedling = updateTagSeedling
|
|
30
30
|
self.filtrar = filtrar
|
|
31
31
|
self.timeFilter = timeFilter
|
|
32
|
+
self.outliers = outliers
|
|
33
|
+
self.window_size_ma = window_size_ma
|
|
32
34
|
|
|
33
35
|
self.info = info
|
|
34
36
|
if not kwargs_fmcreator:
|
|
35
|
-
self.kwargs_fmcreator = {"imputeDistances":
|
|
36
|
-
"dist_mismo_lugar":0.
|
|
37
|
-
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
37
|
+
self.kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
38
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
39
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
40
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.8,
|
|
41
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
38
42
|
else:
|
|
39
43
|
self.kwargs_fmcreator = kwargs_fmcreator
|
|
40
44
|
|
|
41
45
|
if not kwargs_classifier:
|
|
42
|
-
self.kwargs_classifier = {"proba_threshold":0.
|
|
43
|
-
"
|
|
44
|
-
"
|
|
46
|
+
self.kwargs_classifier = {"proba_threshold":0.2,
|
|
47
|
+
"use_proba_ma":False,
|
|
48
|
+
"proba_ma_window":10,
|
|
49
|
+
"update_samePlace":True,
|
|
50
|
+
"update_dstpt":True,
|
|
45
51
|
"useRatioStats":False,
|
|
46
52
|
"std_weight":1.,
|
|
47
53
|
"useDistancesStats":False,
|
|
48
|
-
"ratio_dcdp_umbral":0.
|
|
54
|
+
"ratio_dcdp_umbral":0.0,
|
|
49
55
|
"dist_umbral":0.5,
|
|
50
56
|
"umbral_bajo_dstpt":4,
|
|
51
|
-
"umbral_proba_dstpt":0.
|
|
57
|
+
"umbral_proba_dstpt":0.70,
|
|
58
|
+
"use_ma":True,
|
|
59
|
+
"dstpt_ma_window":104,
|
|
60
|
+
"use_min_dstpt":False,
|
|
61
|
+
"factor":0.1}
|
|
52
62
|
else:
|
|
53
63
|
self.kwargs_classifier = kwargs_classifier
|
|
54
64
|
|
|
55
65
|
if timeFilter:
|
|
56
66
|
self.raw_data = self.filter_raw_by_time_window(**timeFilter)
|
|
57
67
|
|
|
58
|
-
self.plantinFMCreator = PlantinFMCreator(self.kwargs_fmcreator)
|
|
68
|
+
self.plantinFMCreator = PlantinFMCreator(**self.kwargs_fmcreator)
|
|
59
69
|
self.tid = TransformInputData()
|
|
60
70
|
self.data = self.transformRawData(self.raw_data)
|
|
61
|
-
|
|
71
|
+
|
|
72
|
+
if self.filtrar == 1:
|
|
62
73
|
self.data = self.data[self.data["tag_seedling"] == 1]
|
|
63
|
-
elif filtrar == 0:
|
|
74
|
+
elif self.filtrar == 0:
|
|
64
75
|
self.data = self.data[self.data["tag_seedling"] == 0]
|
|
65
76
|
|
|
66
77
|
if "dst_pt" in self.data.columns:
|
|
67
|
-
self.data["
|
|
78
|
+
if len(self.data["dst_pt"]) < window_size_ma:
|
|
79
|
+
self.data["dst_pt_ma"] = self.getSensorMA(window_size=len(self.data["dst_pt"]))
|
|
80
|
+
else:
|
|
81
|
+
self.data["dst_pt_ma"] = self.getSensorMA(window_size=window_size_ma)
|
|
82
|
+
|
|
83
|
+
if "tag_seed_probas1" in self.data.columns:
|
|
84
|
+
if len(self.data["tag_seed_probas1"]) < window_size_ma:
|
|
85
|
+
self.data["tag_seed_probas1_ma"] = self.getProbasMA(window_size=len(self.data["tag_seed_probas1"]))
|
|
86
|
+
else:
|
|
87
|
+
self.data["tag_seed_probas1_ma"] = self.getProbasMA(window_size=window_size_ma)
|
|
68
88
|
|
|
69
89
|
def transformRawData(self, raw_data):
|
|
70
90
|
"""
|
|
@@ -136,6 +156,9 @@ class FeaturesResume():
|
|
|
136
156
|
data["latitud"] = temp_samplesdf["latitud"]
|
|
137
157
|
data["longitud"] = temp_samplesdf["longitud"]
|
|
138
158
|
|
|
159
|
+
if self.outliers:
|
|
160
|
+
data = self.removeOutliers(data.copy(), self.outliers)
|
|
161
|
+
|
|
139
162
|
return data
|
|
140
163
|
|
|
141
164
|
def classifiedData(self, classifier_file = 'modelos\\pipeline_rf.pkl', **kwargs_classifier):
|
|
@@ -143,13 +166,18 @@ class FeaturesResume():
|
|
|
143
166
|
raw_X = self.tid.transform(self.raw_data)
|
|
144
167
|
X, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(raw_X)
|
|
145
168
|
|
|
146
|
-
|
|
169
|
+
# ratio_dcdp_median = np.median(X[:, 1])
|
|
170
|
+
##reemplazo los datos de X[:, 1] por la mediana si están por debajo de -10
|
|
171
|
+
# X[:, 1] = np.where(X[:, 1] < -0.8, ratio_dcdp_median, X[:, 1])
|
|
172
|
+
# X[:, 0] = self.getMA(X[:, 0], window_size=26)
|
|
173
|
+
|
|
174
|
+
clasificador = PlantinClassifier(classifier_file=classifier_file)
|
|
147
175
|
|
|
148
|
-
clasificaciones, probas =
|
|
176
|
+
clasificaciones, probas = clasificador.classify(X, dst_pt, inest_pt, **kwargs_classifier)
|
|
149
177
|
|
|
150
178
|
return clasificaciones, probas
|
|
151
179
|
|
|
152
|
-
def removeOutliers(self, limits:dict={"deltaO": (0, 3600),
|
|
180
|
+
def removeOutliers(self, data, limits:dict={"deltaO": (0, 3600),
|
|
153
181
|
"precision": (0, 10000)}):
|
|
154
182
|
"""
|
|
155
183
|
Función para eliminar outliers de las características procesadas.
|
|
@@ -158,14 +186,17 @@ class FeaturesResume():
|
|
|
158
186
|
##chqueo que columnas sí están dentro de self.data y limits.
|
|
159
187
|
##las que no están, se ignoran y se muestra un mensaje de warning
|
|
160
188
|
##actualizo las columnas dentro de limits eliminando las que no están en self.data
|
|
189
|
+
|
|
161
190
|
for col in list(limits.keys()):
|
|
162
|
-
if col not in
|
|
191
|
+
if col not in data.columns:
|
|
163
192
|
logger.warning(f"La columna {col} no está en los datos y será ignorada.")
|
|
164
193
|
del limits[col]
|
|
165
194
|
|
|
166
195
|
##elimino outliers
|
|
167
196
|
for col, (lower, upper) in limits.items():
|
|
168
|
-
|
|
197
|
+
data = data[(data[col] >= lower) & (data[col] <= upper)]
|
|
198
|
+
|
|
199
|
+
return data
|
|
169
200
|
|
|
170
201
|
def getResume(self, to="all", pctbajo_value=1, pctalto_value=14, lista_funciones=None):
|
|
171
202
|
"""
|
|
@@ -226,10 +257,44 @@ class FeaturesResume():
|
|
|
226
257
|
data: numpy array con los datos de la serie temporal
|
|
227
258
|
window_size: tamaño de la ventana para calcular la media móvil
|
|
228
259
|
"""
|
|
229
|
-
return np.convolve(self.data["dst_pt"].values, np.ones(window_size)/window_size, mode=mode)
|
|
260
|
+
# return np.convolve(self.data["dst_pt"].values, np.ones(window_size)/window_size, mode=mode)
|
|
261
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
262
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
263
|
+
padding_start = self.data["dst_pt"].values[0:window_size]
|
|
264
|
+
padding_end = self.data["dst_pt"].values[-window_size:]
|
|
265
|
+
padded_data = np.concatenate([padding_start, self.data["dst_pt"].values, padding_end])
|
|
266
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
267
|
+
return ma_full[window_size: -window_size]
|
|
230
268
|
|
|
231
|
-
def
|
|
232
|
-
|
|
269
|
+
def getProbasMA(self, window_size=104, mode='same'):
|
|
270
|
+
"""
|
|
271
|
+
Función para calcular la media móvil de una serie temporal.
|
|
272
|
+
data: numpy array con los datos de la serie temporal
|
|
273
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
274
|
+
"""
|
|
275
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
276
|
+
##copio los primeros y últimos valores usando la misma cantidad que window_size
|
|
277
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
278
|
+
padding_start = self.data["tag_seed_probas1"].values[0:window_size]
|
|
279
|
+
padding_end = self.data["tag_seed_probas1"].values[-window_size:]
|
|
280
|
+
padded_data = np.concatenate([padding_start, self.data["tag_seed_probas1"].values, padding_end])
|
|
281
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
282
|
+
return ma_full[window_size: -window_size]
|
|
283
|
+
|
|
284
|
+
def getMA(self, data: np.array, window_size=104, mode='same'):
|
|
285
|
+
"""
|
|
286
|
+
Función para calcular la media móvil de una serie temporal.
|
|
287
|
+
data: numpy array con los datos de la serie temporal
|
|
288
|
+
window_size: tamaño de la ventana para calcular la media móvil
|
|
289
|
+
"""
|
|
290
|
+
##para evitar ceros al inicio y al final debido a la convolución, agrego padding
|
|
291
|
+
##copio los primeros y últimos valores usando la misma cantidad que window_size
|
|
292
|
+
##pongo los primeros window_size valores de la señal al inicio y los últimos window_size valores al final
|
|
293
|
+
padding_start = data[0:window_size]
|
|
294
|
+
padding_end = data[-window_size:]
|
|
295
|
+
padded_data = np.concatenate([padding_start, data, padding_end])
|
|
296
|
+
ma_full = np.convolve(padded_data, np.ones(window_size)/window_size, mode='same')
|
|
297
|
+
return ma_full[window_size: -window_size]
|
|
233
298
|
|
|
234
299
|
def to_time_obj(self,t):
|
|
235
300
|
"""
|
|
@@ -299,6 +364,13 @@ class FeaturesResume():
|
|
|
299
364
|
mask &= ~tod.eq(t1)
|
|
300
365
|
|
|
301
366
|
filtered = df[mask]
|
|
367
|
+
#me quedo con los indices donde se cumpla df[mask] y aplico a self.raw_data de origen
|
|
368
|
+
|
|
369
|
+
##chequeo que filtered no esté vacio, sino retorno None
|
|
370
|
+
if filtered.empty or len(filtered) < 10:
|
|
371
|
+
logger.warning("El filtro de tiempo resultó en un conjunto vacío.")
|
|
372
|
+
print("El filtro de tiempo resultó en un conjunto vacío.")
|
|
373
|
+
return None
|
|
302
374
|
|
|
303
375
|
#si inplace, actualizo filtro raw_data y retorno un nuevo objeto FeaturesResume, sino retorno los datos filtrados
|
|
304
376
|
if inplace:
|
|
@@ -312,7 +384,9 @@ class FeaturesResume():
|
|
|
312
384
|
updateTagSeedling = self.updateTagSeedling,
|
|
313
385
|
kwargs_fmcreator = self.kwargs_fmcreator,
|
|
314
386
|
kwargs_classifier = self.kwargs_classifier,
|
|
315
|
-
timeFilter = None # ya apliqué el filtro
|
|
387
|
+
timeFilter = None, # ya apliqué el filtro
|
|
388
|
+
outliers = self.outliers,
|
|
389
|
+
window_size_ma=self.window_size_ma,
|
|
316
390
|
)
|
|
317
391
|
|
|
318
392
|
return new_fr
|
|
@@ -374,7 +448,7 @@ class FeaturesResume():
|
|
|
374
448
|
):
|
|
375
449
|
"""
|
|
376
450
|
Genera un gráfico de comparación entre dos características en ejes y diferentes.
|
|
377
|
-
|
|
451
|
+
Se puede elegir si cada eje usa línea, solo marcadores, o ambos.
|
|
378
452
|
|
|
379
453
|
Args:
|
|
380
454
|
- feature1, feature2: nombres de columnas en self.data.
|
|
@@ -442,15 +516,16 @@ class FeaturesResume():
|
|
|
442
516
|
lines2, labels2 = ax2.get_legend_handles_labels()
|
|
443
517
|
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best')
|
|
444
518
|
|
|
445
|
-
if show:
|
|
446
|
-
plt.show()
|
|
447
|
-
|
|
448
519
|
if save:
|
|
449
520
|
if filename is not None:
|
|
450
521
|
plt.savefig(filename)
|
|
451
522
|
else:
|
|
452
523
|
plt.savefig(f"feature_comparison_{feature1}_{feature2}.png")
|
|
453
|
-
|
|
524
|
+
|
|
525
|
+
if show:
|
|
526
|
+
plt.show()
|
|
527
|
+
else:
|
|
528
|
+
plt.close(fig) # Cierra la figura para liberar memoria
|
|
454
529
|
|
|
455
530
|
##gráfico de dispersión para comparar la distribución de 0s y 1s
|
|
456
531
|
def plot_geo_compare(
|
|
@@ -464,6 +539,9 @@ class FeaturesResume():
|
|
|
464
539
|
s: float = 10.0,
|
|
465
540
|
alpha: float = 0.8,
|
|
466
541
|
equal_aspect: bool = True,
|
|
542
|
+
save = False,
|
|
543
|
+
show = True,
|
|
544
|
+
filename = None,
|
|
467
545
|
# ---- NUEVO: control de colorbar y límites de color ----
|
|
468
546
|
vmin: float | None = None,
|
|
469
547
|
vmax: float | None = None,
|
|
@@ -541,7 +619,15 @@ class FeaturesResume():
|
|
|
541
619
|
ax.set_xlim(xmin, xmax)
|
|
542
620
|
ax.set_ylim(ymin, ymax)
|
|
543
621
|
|
|
544
|
-
|
|
622
|
+
|
|
623
|
+
if save:
|
|
624
|
+
if filename is not None:
|
|
625
|
+
plt.savefig(filename)
|
|
626
|
+
else:
|
|
627
|
+
plt.savefig(f"geo_compare_{feature_col}.png")
|
|
628
|
+
if show:
|
|
629
|
+
plt.show()
|
|
630
|
+
plt.close(fig) # Cierra la figura para liberar memoria
|
|
545
631
|
|
|
546
632
|
if __name__ == "__main__":
|
|
547
633
|
import json
|
|
@@ -554,20 +640,33 @@ if __name__ == "__main__":
|
|
|
554
640
|
pkg_logger = logging.getLogger("sarapy.stats")
|
|
555
641
|
pkg_logger.setLevel(logging.ERROR)
|
|
556
642
|
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
643
|
+
## argumentos de PlantinFMCreator
|
|
644
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
645
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
646
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
647
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
648
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
652
|
+
kwargs_classifier = {"proba_threshold":0.45,
|
|
653
|
+
"use_proba_ma":False,
|
|
654
|
+
"proba_ma_window":10,
|
|
655
|
+
"update_samePlace":True,
|
|
656
|
+
"update_dstpt":True,
|
|
657
|
+
"umbral_proba_dstpt":0.5,
|
|
658
|
+
"umbral_bajo_dstpt":1.5,
|
|
659
|
+
"use_ma":True,
|
|
660
|
+
"dstpt_ma_window":62,
|
|
661
|
+
"use_min_dstpt":False,
|
|
662
|
+
"factor":0.1,
|
|
663
|
+
|
|
564
664
|
"useRatioStats":False,
|
|
565
665
|
"std_weight":1.,
|
|
566
666
|
"useDistancesStats":False,
|
|
567
|
-
"ratio_dcdp_umbral":0.
|
|
667
|
+
"ratio_dcdp_umbral":0.1,
|
|
568
668
|
"dist_umbral":0.5,
|
|
569
|
-
|
|
570
|
-
"umbral_proba_dstpt":0.7}
|
|
669
|
+
}
|
|
571
670
|
|
|
572
671
|
|
|
573
672
|
time_filter=None
|
|
@@ -590,14 +689,19 @@ if __name__ == "__main__":
|
|
|
590
689
|
|
|
591
690
|
merged_data = dataMerging(historical_data, post_data, raw_data, nodoName=nodo,newColumns=False, asDF=False)
|
|
592
691
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
692
|
+
outliers = {
|
|
693
|
+
"ratio_dCdP": (-5, 2),
|
|
694
|
+
"deltaO": (0, 3600),
|
|
695
|
+
"time_ac": (0, 100),
|
|
696
|
+
"precision": (0, 5000),
|
|
697
|
+
"distances": (0, 100)
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
fr = FeaturesResume(merged_data, info = nodo, filtrar=None, outliers=outliers,
|
|
701
|
+
kwargs_classifier=kwargs_classifier,
|
|
702
|
+
kwargs_fmcreator=kwargs_fmcreator,
|
|
703
|
+
updateTagSeedling=True, timeFilter=None,
|
|
704
|
+
window_size_ma=62)
|
|
601
705
|
|
|
602
706
|
print(fr.data["tag_seedling"].value_counts(normalize=True))
|
|
603
707
|
print(fr.getResume(to="all"))
|
|
@@ -32,8 +32,8 @@ class OpsProcessor():
|
|
|
32
32
|
self.classifications_probas = None
|
|
33
33
|
plclass_map = {"classifier_file"}
|
|
34
34
|
self._operationsDict = {} ##diccionario de operarios con sus operaciones
|
|
35
|
-
self._platin_classifiedOperations = np.array([]) ##array con las operaciones clasificadas para plantin
|
|
36
|
-
self._fertilizer_classifiedOperations = np.array([]) ##array con las operaciones clasificadas para plantin
|
|
35
|
+
# self._platin_classifiedOperations = np.array([]) ##array con las operaciones clasificadas para plantin
|
|
36
|
+
# self._fertilizer_classifiedOperations = np.array([]) ##array con las operaciones clasificadas para plantin
|
|
37
37
|
self._last_row_db = 0 ##indicador de la última fila de los datos extraidos de la base de datos histórica
|
|
38
38
|
|
|
39
39
|
kwargs_plclass = {}
|
|
@@ -43,7 +43,8 @@ class OpsProcessor():
|
|
|
43
43
|
kwargs_plclass[key] = value
|
|
44
44
|
|
|
45
45
|
fmcreator_map = {"imputeDistances", "distanciaMedia", "umbral_precision",
|
|
46
|
-
"dist_mismo_lugar", "max_dist", "umbral_ratio_dCdP", "deltaO_medio"
|
|
46
|
+
"dist_mismo_lugar", "max_dist", "umbral_ratio_dCdP", "deltaO_medio",
|
|
47
|
+
"impute_ratiodcdp", "umbral_impute_ratiodcdp", "deltaO_ma", "deltaO_ma_window"}
|
|
47
48
|
fmcreator_kargs = {}
|
|
48
49
|
##recorro kwargs y usando fmcreator_map creo un nuevo diccionario con los valores que se pasaron
|
|
49
50
|
for key, value in kwargs.items():
|
|
@@ -188,7 +189,7 @@ class OpsProcessor():
|
|
|
188
189
|
logging.debug(f"Número de operaciones para el nodo {ID_NPDP}: {len(operations)}")
|
|
189
190
|
features, dst_pt, inest_pt = self.plantinFMCreator.fit_transform(operations)
|
|
190
191
|
logging.debug(f"Features shape for {ID_NPDP}: {features.shape}")
|
|
191
|
-
classified_ops, classifications_probas = self._plantin_classifier.classify(features, dst_pt, inest_pt, **
|
|
192
|
+
classified_ops, classifications_probas = self._plantin_classifier.classify(features, dst_pt, inest_pt, **kwargs)
|
|
192
193
|
logging.debug(f"Classified operations shape for {ID_NPDP}: {classified_ops.shape}")
|
|
193
194
|
|
|
194
195
|
##chequeo si first_day_op_classified es True, si es así, no se considera la primera fila de las classified_ops
|
|
@@ -306,37 +307,51 @@ if __name__ == "__main__":
|
|
|
306
307
|
import logging
|
|
307
308
|
|
|
308
309
|
## argumentos de PlantinFMCreator
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
310
|
+
kwargs_fmcreator = {"imputeDistances":True, "distanciaMedia":1.8, "umbral_precision":0.3,
|
|
311
|
+
"dist_mismo_lugar":0.2, "max_dist":100,
|
|
312
|
+
"umbral_ratio_dCdP":2, "deltaO_medio":4,
|
|
313
|
+
"impute_ratiodcdp": True, "umbral_impute_ratiodcdp": -0.5,
|
|
314
|
+
"deltaO_ma": True, "deltaO_ma_window": 26}
|
|
312
315
|
|
|
313
316
|
|
|
314
|
-
|
|
315
|
-
kwargs_classifier = {"proba_threshold":0.
|
|
316
|
-
"
|
|
317
|
-
"
|
|
317
|
+
##argumentos del método PlantinClassifier.clasiffy()
|
|
318
|
+
kwargs_classifier = {"proba_threshold":0.4,
|
|
319
|
+
"use_proba_ma":False,
|
|
320
|
+
"proba_ma_window":10,
|
|
321
|
+
"update_samePlace":True,
|
|
322
|
+
"update_dstpt":True,
|
|
323
|
+
"umbral_proba_dstpt":0.5,
|
|
324
|
+
"umbral_bajo_dstpt":1.5,
|
|
325
|
+
"use_ma":True,
|
|
326
|
+
"dstpt_ma_window":62,
|
|
327
|
+
"use_min_dstpt":False,
|
|
328
|
+
"factor":0.1,
|
|
329
|
+
|
|
318
330
|
"useRatioStats":False,
|
|
319
331
|
"std_weight":1.,
|
|
320
332
|
"useDistancesStats":False,
|
|
321
|
-
"ratio_dcdp_umbral":0.
|
|
333
|
+
"ratio_dcdp_umbral":0.1,
|
|
322
334
|
"dist_umbral":0.5,
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
nodos = ['UPM006N','UPM007N','UPM034N','UPM037N','UPM038N','UPM039N','UPM045N','UPM041N',
|
|
338
|
+
'UPM048N','UPM105N','UPM107N']
|
|
339
|
+
for nodo in nodos:
|
|
340
|
+
print(f"**************** Procesando nodo: {nodo} ***********************")
|
|
341
|
+
historical_data_path = f"examples\\2025-08-09\\{nodo}\\historical-data.json"
|
|
342
|
+
with open(historical_data_path, 'r') as file:
|
|
343
|
+
samples = json.load(file)
|
|
344
|
+
|
|
345
|
+
op = OpsProcessor(classifier_file='modelos\\pipeline_rf.pkl',
|
|
346
|
+
# regresor_file='modelos\\regresor.pkl', poly_features_file='modelos\\poly_features.pkl',
|
|
347
|
+
regresor_file='modelos\\regfresor_v2.pkl', poly_features_file='modelos\\poly_features_v2.pkl',
|
|
348
|
+
**kwargs_fmcreator)
|
|
349
|
+
|
|
350
|
+
ops_clasificadas = op.processOperations(samples, **kwargs_classifier)
|
|
351
|
+
probas = op.classifications_probas
|
|
352
|
+
# print(probas[:3])
|
|
353
|
+
# print(ops_clasificadas[:3])
|
|
354
|
+
df_ops_clasificadas = pd.DataFrame(ops_clasificadas)
|
|
355
|
+
|
|
356
|
+
print(df_ops_clasificadas.describe())
|
|
357
|
+
print(f"***************************************************************")
|