edb-noumea 0.2.11__tar.gz → 0.2.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/PKG-INFO +1 -1
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/edb_noumea/details.py +33 -13
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/edb_noumea.egg-info/PKG-INFO +1 -1
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/pyproject.toml +1 -1
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/README.md +0 -0
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/edb_noumea/__init__.py +0 -0
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/edb_noumea/main.py +0 -0
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/edb_noumea.egg-info/SOURCES.txt +0 -0
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/edb_noumea.egg-info/dependency_links.txt +0 -0
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/edb_noumea.egg-info/requires.txt +0 -0
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/edb_noumea.egg-info/top_level.txt +0 -0
- {edb_noumea-0.2.11 → edb_noumea-0.2.13}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: edb-noumea
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.13
|
|
4
4
|
Summary: Un scraper pour la qualité des eaux de baignade à Nouméa.
|
|
5
5
|
Project-URL: Homepage, https://github.com/adriens/edb-noumea
|
|
6
6
|
Project-URL: Repository, https://github.com/adriens/edb-noumea
|
|
@@ -96,8 +96,19 @@ def get_detailed_results():
|
|
|
96
96
|
print("\nColonnes:", list(df.columns))
|
|
97
97
|
print("Shape:", df.shape)
|
|
98
98
|
|
|
99
|
-
#
|
|
100
|
-
|
|
99
|
+
# Sélection dynamique des colonnes bactéries par nom
|
|
100
|
+
# Recherche des colonnes contenant les mots-clés
|
|
101
|
+
e_coli_col = next((col for col in df.columns if "Escherichia" in str(col) or "coli" in str(col)), None)
|
|
102
|
+
entero_col = next((col for col in df.columns if "Entérocoques" in str(col)), None)
|
|
103
|
+
|
|
104
|
+
if e_coli_col is None or entero_col is None:
|
|
105
|
+
print(f"❌ Colonnes bactéries non trouvées dans le tableau extrait. Colonnes disponibles : {list(df.columns)}")
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
# Sélectionne les 4 premières colonnes + colonnes bactéries trouvées
|
|
109
|
+
selected_cols = [df.columns[0], df.columns[1], df.columns[2], df.columns[4], e_coli_col, entero_col]
|
|
110
|
+
cleaned_df = df.loc[:, selected_cols].copy()
|
|
111
|
+
cleaned_df.columns = [
|
|
101
112
|
"site",
|
|
102
113
|
"point_de_prelevement",
|
|
103
114
|
"date",
|
|
@@ -105,8 +116,6 @@ def get_detailed_results():
|
|
|
105
116
|
"e_coli_npp_100ml",
|
|
106
117
|
"enterocoques_npp_100ml"
|
|
107
118
|
]
|
|
108
|
-
cleaned_df = df.iloc[:, :6].copy()
|
|
109
|
-
cleaned_df.columns = expected_columns
|
|
110
119
|
|
|
111
120
|
# Ajoute deux colonnes issues du split de 'point_de_prelevement'
|
|
112
121
|
split_points = cleaned_df["point_de_prelevement"].str.split(",", n=1, expand=True)
|
|
@@ -117,11 +126,19 @@ def get_detailed_results():
|
|
|
117
126
|
if "heure" in cleaned_df.columns:
|
|
118
127
|
cleaned_df["heure"] = cleaned_df["heure"].astype(str)
|
|
119
128
|
|
|
129
|
+
|
|
120
130
|
# Nettoyer et convertir les colonnes e_coli_npp_100ml et enterocoques_npp_100ml
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
131
|
+
if "e_coli_npp_100ml" in cleaned_df.columns:
|
|
132
|
+
cleaned_df["e_coli_npp_100ml"] = cleaned_df["e_coli_npp_100ml"].astype(str).str.replace(r"<\s*10", "10", regex=True)
|
|
133
|
+
cleaned_df["e_coli_npp_100ml"] = pd.to_numeric(cleaned_df["e_coli_npp_100ml"], errors="coerce").astype('Int64')
|
|
134
|
+
|
|
135
|
+
if "enterocoques_npp_100ml" in cleaned_df.columns:
|
|
136
|
+
cleaned_df["enterocoques_npp_100ml"] = cleaned_df["enterocoques_npp_100ml"].astype(str).str.replace(r"<\s*10", "10", regex=True)
|
|
137
|
+
cleaned_df["enterocoques_npp_100ml"] = pd.to_numeric(cleaned_df["enterocoques_npp_100ml"], errors="coerce").astype('Int64')
|
|
138
|
+
|
|
139
|
+
# Convertir la colonne 'date' en datetime (format jour/mois/année)
|
|
140
|
+
if "date" in cleaned_df.columns:
|
|
141
|
+
cleaned_df["date"] = pd.to_datetime(cleaned_df["date"], format="%d/%m/%Y", errors="coerce")
|
|
125
142
|
|
|
126
143
|
return cleaned_df
|
|
127
144
|
|
|
@@ -129,9 +146,12 @@ if __name__ == "__main__":
|
|
|
129
146
|
# Obtenir le DataFrame des résultats détaillés
|
|
130
147
|
detailed_df = get_detailed_results()
|
|
131
148
|
|
|
132
|
-
# Afficher
|
|
149
|
+
# Afficher seulement les colonnes demandées
|
|
133
150
|
if detailed_df is not None:
|
|
134
|
-
print("\n📋
|
|
135
|
-
print(detailed_df
|
|
136
|
-
|
|
137
|
-
|
|
151
|
+
print("\n📋 Détails synthétiques :")
|
|
152
|
+
print(detailed_df[[
|
|
153
|
+
"point_de_prelevement",
|
|
154
|
+
"date",
|
|
155
|
+
"e_coli_npp_100ml",
|
|
156
|
+
"enterocoques_npp_100ml"
|
|
157
|
+
]])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: edb-noumea
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.13
|
|
4
4
|
Summary: Un scraper pour la qualité des eaux de baignade à Nouméa.
|
|
5
5
|
Project-URL: Homepage, https://github.com/adriens/edb-noumea
|
|
6
6
|
Project-URL: Repository, https://github.com/adriens/edb-noumea
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|