colombia-hydrodata 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,307 @@
1
+ Metadata-Version: 2.4
2
+ Name: colombia-hydrodata
3
+ Version: 0.1.0
4
+ Summary:
5
+ Author: Sebástian Narváez
6
+ Author-email: sebnarvaez19@outlook.com
7
+ Requires-Python: >=3.14
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.14
10
+ Requires-Dist: aquarius-webportal (>=0.4.1,<1.0.0)
11
+ Requires-Dist: geopandas (>=1.1.3,<2.0.0)
12
+ Requires-Dist: pandas (>=3.0.1,<4.0.0)
13
+ Requires-Dist: platformdirs (>=4.9.4,<5.0.0)
14
+ Requires-Dist: pyarrow (>=23.0.1,<24.0.0)
15
+ Requires-Dist: requests (>=2.32.5,<3.0.0)
16
+ Requires-Dist: shapely (>=2.1.2,<3.0.0)
17
+ Description-Content-Type: text/markdown
18
+
19
+ # Colombia Hydrodata
20
+
21
+ Python client to access **hydrological and meteorological data from Colombia**.
22
+
23
+ The library integrates multiple official data sources and exposes them through a simple and Pythonic API.
24
+
25
+ Data is fetched from:
26
+
27
+ - [Catálogo Nacional de Estaciones – Datos Abiertos Colombia](https://datos.gov.co/Ambiente-y-Desarrollo-Sostenible/Cat-logo-Nacional-de-Estaciones-del-IDEAM/hp9r-jxuu)
28
+ - [AQUARIUS WebPortal – IDEAM](http://aquariuswebportal.ideam.gov.co/)
29
+
30
+ ---
31
+
32
+ ## Architecture
33
+
34
+ The library follows a **client → station → dataset** hierarchy.
35
+
36
+ ```
37
+ Client
38
+ └── Station
39
+ └── Dataset
40
+ ```
41
+
42
+ - **`Client`** loads the station catalog and exposes methods to query and filter stations.
43
+ - **`Station`** represents an IDEAM monitoring station with its full metadata and available variables.
44
+ - **`Dataset`** represents a time series retrieved from the Aquarius WebPortal.
45
+
46
+ ---
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ pip install colombia-hydrodata
52
+ ```
53
+
54
+ Or with Poetry:
55
+
56
+ ```bash
57
+ poetry add colombia-hydrodata
58
+ ```
59
+
60
+ ---
61
+
62
+ ## Quick Start
63
+
64
+ ```python
65
+ from colombia_hydrodata.client import Client
66
+
67
+ client = Client()
68
+
69
+ # Fetch a single station
70
+ station = client.fetch_station("29037020")
71
+ print(station)
72
+
73
+ # Fetch its discharge time series
74
+ dataset = station["CAUDAL@HIS_Q_MEDIA_D"]
75
+ print(dataset.data)
76
+ ```
77
+
78
+ ---
79
+
80
+ ## Client
81
+
82
+ `Client` is the main entry point. On initialization it downloads the full IDEAM station catalog from Datos Abiertos Colombia and stores it internally as a **GeoDataFrame**.
83
+
84
+ ```python
85
+ from colombia_hydrodata.client import Client
86
+
87
+ client = Client()
88
+ ```
89
+
90
+ ---
91
+
92
+ ## Fetching Stations
93
+
94
+ ### Single station
95
+
96
+ ```python
97
+ station = client.fetch_station("29037020")
98
+ ```
99
+
100
+ Returns a `Station` object with full metadata.
101
+
102
+ ### Multiple stations by ID
103
+
104
+ ```python
105
+ stations = client.fetch_stations(["29037020", "29037021"])
106
+ ```
107
+
108
+ Returns a list of `Station` objects.
109
+
110
+ ---
111
+
112
+ ## Spatial Queries
113
+
114
+ ### Bounding box
115
+
116
+ ```python
117
+ stations = client.fetch_bbox(
118
+ xmin=-75.0,
119
+ ymin=9.5,
120
+ xmax=-74.0,
121
+ ymax=10.5
122
+ )
123
+ ```
124
+
125
+ ### Shapely geometry
126
+
127
+ ```python
128
+ from shapely.geometry import Polygon
129
+
130
+ region = Polygon(...)
131
+ stations = client.fetch_region(region)
132
+ ```
133
+
134
+ ### Catalog only (no Station objects)
135
+
136
+ To get a lightweight GeoDataFrame of station metadata without instantiating `Station` objects:
137
+
138
+ ```python
139
+ gdf = client.stations_in_region(region)
140
+ gdf = client.stations_in_list(["29037020", "29037021"])
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Filtering Stations
146
+
147
+ Use the `Filters` dataclass to narrow results by metadata fields. All filters are combined with logical **AND** and are optional.
148
+
149
+ ```python
150
+ from colombia_hydrodata.client import Client
151
+ from colombia_hydrodata.filters import Filters
152
+
153
+ client = Client()
154
+
155
+ filters = Filters(
156
+ category="Limnimétrica",
157
+ department="Bolivar",
158
+ status="Activa",
159
+ )
160
+
161
+ stations = client.fetch_region(region, filters=filters)
162
+ ```
163
+
164
+ Available filter fields:
165
+
166
+ | Field | Description |
167
+ |------------------------|--------------------------|
168
+ | `category` | Station category |
169
+ | `department` | Department |
170
+ | `municipality` | Municipality |
171
+ | `status` | Operational status |
172
+ | `owner` | Owning institution |
173
+ | `hydrographic_area` | Hydrographic area |
174
+ | `hydrographic_zone` | Hydrographic zone |
175
+ | `hydrographic_subzone` | Hydrographic subzone |
176
+
177
+ ---
178
+
179
+ ## Station
180
+
181
+ A `Station` is a **frozen dataclass** (read-only) with full metadata about an IDEAM monitoring station.
182
+
183
+ ```python
184
+ station = client.fetch_station("29037020")
185
+ print(station)
186
+ ```
187
+
188
+ Example output:
189
+
190
+ ```
191
+ Station CALAMAR: 29037020
192
+ Calamar (Bolivar)
193
+ Info: Activa Limnimétrica (Convencional)
194
+ Time: 1940-07-15 00:00:00 - ongoing
195
+ Owner: INSTITUTO DE HIDROLOGÍA METEOROLOGÍA Y ESTUDIOS AMBIENTALES
196
+ Location: altitude=8.00 [-74.915; 10.243]
197
+ Hydrographic: area=Magdalena Cauca zone=Bajo Magdalena subzone=Canal del Dique margen izquierda
198
+ Variables:
199
+ CAUDAL:
200
+ HIS_Q_MEDIA_D, HIS_Q_MX_M, HIS_Q_MEDIA_M, Q_MN_D, Q_MN_M, Q_MX_D, Q_MX_M, Q_MN_A, Q_MX_A, Q_MEDIA_A, CAUDAL_H
201
+ NIVEL:
202
+ NVLM_CON, HIS_NV_MEDIA_D, HIS_NV_MN_M, HIS_NV_MX_M, NIVEL_H, NV_MEDIA_D, NV_MN_D, NV_MN_M, NV_MX_D, NV_MX_M, NV_MN_A, NV_MX_A, NV_MEDIA_A, HIS_NIVEL_H
203
+ TM:
204
+ HIS_TR_QS_M, HIS_TR_QS_TT_M, HIS_TR_QS_MX_M, HIS_TR_KT/D_QS_D
205
+ ```
206
+
207
+ ### Station attributes
208
+
209
+ | Attribute | Description |
210
+ |---------------------|------------------------------------|
211
+ | `id` | Station code |
212
+ | `name` | Station name |
213
+ | `category` | Category (e.g. Limnimétrica) |
214
+ | `technology` | Technology (e.g. Convencional) |
215
+ | `status` | Operational status |
216
+ | `department` | Department |
217
+ | `municipality` | Municipality |
218
+ | `installation_date` | Date the station was installed |
219
+ | `suspension_date` | Date suspended (`None` if active) |
220
+ | `owner` | Owning institution |
221
+ | `location` | `Location` (altitude, lon, lat) |
222
+ | `hydrographic` | `Hydrographic` (area, zone, subzone)|
223
+ | `variables` | Dict of available `Variable` objects|
224
+
225
+ ### Checking variable availability
226
+
227
+ ```python
228
+ "CAUDAL@HIS_Q_MEDIA_D" in station # True / False
229
+ ```
230
+
231
+ ---
232
+
233
+ ## Fetching Data
234
+
235
+ Variables are identified by a key in the format `PARAM@LABEL`, matching the variables listed when you print a station.
236
+
237
+ ### Using `fetch()`
238
+
239
+ ```python
240
+ dataset = station.fetch("CAUDAL@HIS_Q_MEDIA_D")
241
+ ```
242
+
243
+ ### Using `[]` (shorthand)
244
+
245
+ ```python
246
+ dataset = station["CAUDAL@HIS_Q_MEDIA_D"]
247
+ ```
248
+
249
+ Both return a `Dataset` object.
250
+
251
+ ---
252
+
253
+ ## Dataset
254
+
255
+ A `Dataset` represents a **time series associated with a station and a variable**.
256
+
257
+ ```python
258
+ dataset = station["NIVEL@NV_MEDIA_D"]
259
+
260
+ print(dataset.station) # Station object
261
+ print(dataset.variable) # Variable object (param, label, id)
262
+ print(dataset.data) # pandas DataFrame
263
+ ```
264
+
265
+ The `data` DataFrame has two columns:
266
+
267
+ | Column | Description |
268
+ |-------------|---------------------|
269
+ | `timestamp` | datetime |
270
+ | `value` | numeric measurement |
271
+
272
+ Example:
273
+
274
+ ```
275
+ timestamp value
276
+ 0 2025-01-01 2.31
277
+ 1 2025-01-02 2.28
278
+ 2 2025-01-03 2.35
279
+ ```
280
+
281
+ ---
282
+
283
+ ## Data Sources
284
+
285
+ ### Station Catalog
286
+
287
+ Fetched from the **Socrata SODA API** via Datos Abiertos Colombia:
288
+
289
+ ```
290
+ https://datos.gov.co/resource/hp9r-jxuu.json
291
+ ```
292
+
293
+ ### Time Series
294
+
295
+ Fetched from the **IDEAM Aquarius WebPortal**:
296
+
297
+ ```
298
+ http://aquariuswebportal.ideam.gov.co/
299
+ ```
300
+
301
+ Each variable key (`PARAM@LABEL`) corresponds to a unique Aquarius dataset ID used internally to retrieve the time series.
302
+
303
+ ---
304
+
305
+ *README made with [GitHub Copilot](https://github.com/features/copilot).*
306
+
307
+
@@ -0,0 +1,288 @@
1
+ # Colombia Hydrodata
2
+
3
+ Python client to access **hydrological and meteorological data from Colombia**.
4
+
5
+ The library integrates multiple official data sources and exposes them through a simple and Pythonic API.
6
+
7
+ Data is fetched from:
8
+
9
+ - [Catálogo Nacional de Estaciones – Datos Abiertos Colombia](https://datos.gov.co/Ambiente-y-Desarrollo-Sostenible/Cat-logo-Nacional-de-Estaciones-del-IDEAM/hp9r-jxuu)
10
+ - [AQUARIUS WebPortal – IDEAM](http://aquariuswebportal.ideam.gov.co/)
11
+
12
+ ---
13
+
14
+ ## Architecture
15
+
16
+ The library follows a **client → station → dataset** hierarchy.
17
+
18
+ ```
19
+ Client
20
+ └── Station
21
+ └── Dataset
22
+ ```
23
+
24
+ - **`Client`** loads the station catalog and exposes methods to query and filter stations.
25
+ - **`Station`** represents an IDEAM monitoring station with its full metadata and available variables.
26
+ - **`Dataset`** represents a time series retrieved from the Aquarius WebPortal.
27
+
28
+ ---
29
+
30
+ ## Installation
31
+
32
+ ```bash
33
+ pip install colombia-hydrodata
34
+ ```
35
+
36
+ Or with Poetry:
37
+
38
+ ```bash
39
+ poetry add colombia-hydrodata
40
+ ```
41
+
42
+ ---
43
+
44
+ ## Quick Start
45
+
46
+ ```python
47
+ from colombia_hydrodata.client import Client
48
+
49
+ client = Client()
50
+
51
+ # Fetch a single station
52
+ station = client.fetch_station("29037020")
53
+ print(station)
54
+
55
+ # Fetch its discharge time series
56
+ dataset = station["CAUDAL@HIS_Q_MEDIA_D"]
57
+ print(dataset.data)
58
+ ```
59
+
60
+ ---
61
+
62
+ ## Client
63
+
64
+ `Client` is the main entry point. On initialization it downloads the full IDEAM station catalog from Datos Abiertos Colombia and stores it internally as a **GeoDataFrame**.
65
+
66
+ ```python
67
+ from colombia_hydrodata.client import Client
68
+
69
+ client = Client()
70
+ ```
71
+
72
+ ---
73
+
74
+ ## Fetching Stations
75
+
76
+ ### Single station
77
+
78
+ ```python
79
+ station = client.fetch_station("29037020")
80
+ ```
81
+
82
+ Returns a `Station` object with full metadata.
83
+
84
+ ### Multiple stations by ID
85
+
86
+ ```python
87
+ stations = client.fetch_stations(["29037020", "29037021"])
88
+ ```
89
+
90
+ Returns a list of `Station` objects.
91
+
92
+ ---
93
+
94
+ ## Spatial Queries
95
+
96
+ ### Bounding box
97
+
98
+ ```python
99
+ stations = client.fetch_bbox(
100
+ xmin=-75.0,
101
+ ymin=9.5,
102
+ xmax=-74.0,
103
+ ymax=10.5
104
+ )
105
+ ```
106
+
107
+ ### Shapely geometry
108
+
109
+ ```python
110
+ from shapely.geometry import Polygon
111
+
112
+ region = Polygon(...)
113
+ stations = client.fetch_region(region)
114
+ ```
115
+
116
+ ### Catalog only (no Station objects)
117
+
118
+ To get a lightweight GeoDataFrame of station metadata without instantiating `Station` objects:
119
+
120
+ ```python
121
+ gdf = client.stations_in_region(region)
122
+ gdf = client.stations_in_list(["29037020", "29037021"])
123
+ ```
124
+
125
+ ---
126
+
127
+ ## Filtering Stations
128
+
129
+ Use the `Filters` dataclass to narrow results by metadata fields. All filters are combined with logical **AND** and are optional.
130
+
131
+ ```python
132
+ from colombia_hydrodata.client import Client
133
+ from colombia_hydrodata.filters import Filters
134
+
135
+ client = Client()
136
+
137
+ filters = Filters(
138
+ category="Limnimétrica",
139
+ department="Bolivar",
140
+ status="Activa",
141
+ )
142
+
143
+ stations = client.fetch_region(region, filters=filters)
144
+ ```
145
+
146
+ Available filter fields:
147
+
148
+ | Field | Description |
149
+ |------------------------|--------------------------|
150
+ | `category` | Station category |
151
+ | `department` | Department |
152
+ | `municipality` | Municipality |
153
+ | `status` | Operational status |
154
+ | `owner` | Owning institution |
155
+ | `hydrographic_area` | Hydrographic area |
156
+ | `hydrographic_zone` | Hydrographic zone |
157
+ | `hydrographic_subzone` | Hydrographic subzone |
158
+
159
+ ---
160
+
161
+ ## Station
162
+
163
+ A `Station` is a **frozen dataclass** (read-only) with full metadata about an IDEAM monitoring station.
164
+
165
+ ```python
166
+ station = client.fetch_station("29037020")
167
+ print(station)
168
+ ```
169
+
170
+ Example output:
171
+
172
+ ```
173
+ Station CALAMAR: 29037020
174
+ Calamar (Bolivar)
175
+ Info: Activa Limnimétrica (Convencional)
176
+ Time: 1940-07-15 00:00:00 - ongoing
177
+ Owner: INSTITUTO DE HIDROLOGÍA METEOROLOGÍA Y ESTUDIOS AMBIENTALES
178
+ Location: altitude=8.00 [-74.915; 10.243]
179
+ Hydrographic: area=Magdalena Cauca zone=Bajo Magdalena subzone=Canal del Dique margen izquierda
180
+ Variables:
181
+ CAUDAL:
182
+ HIS_Q_MEDIA_D, HIS_Q_MX_M, HIS_Q_MEDIA_M, Q_MN_D, Q_MN_M, Q_MX_D, Q_MX_M, Q_MN_A, Q_MX_A, Q_MEDIA_A, CAUDAL_H
183
+ NIVEL:
184
+ NVLM_CON, HIS_NV_MEDIA_D, HIS_NV_MN_M, HIS_NV_MX_M, NIVEL_H, NV_MEDIA_D, NV_MN_D, NV_MN_M, NV_MX_D, NV_MX_M, NV_MN_A, NV_MX_A, NV_MEDIA_A, HIS_NIVEL_H
185
+ TM:
186
+ HIS_TR_QS_M, HIS_TR_QS_TT_M, HIS_TR_QS_MX_M, HIS_TR_KT/D_QS_D
187
+ ```
188
+
189
+ ### Station attributes
190
+
191
+ | Attribute | Description |
192
+ |---------------------|------------------------------------|
193
+ | `id` | Station code |
194
+ | `name` | Station name |
195
+ | `category` | Category (e.g. Limnimétrica) |
196
+ | `technology` | Technology (e.g. Convencional) |
197
+ | `status` | Operational status |
198
+ | `department` | Department |
199
+ | `municipality` | Municipality |
200
+ | `installation_date` | Date the station was installed |
201
+ | `suspension_date` | Date suspended (`None` if active) |
202
+ | `owner` | Owning institution |
203
+ | `location` | `Location` (altitude, lon, lat) |
204
+ | `hydrographic` | `Hydrographic` (area, zone, subzone)|
205
+ | `variables` | Dict of available `Variable` objects|
206
+
207
+ ### Checking variable availability
208
+
209
+ ```python
210
+ "CAUDAL@HIS_Q_MEDIA_D" in station # True / False
211
+ ```
212
+
213
+ ---
214
+
215
+ ## Fetching Data
216
+
217
+ Variables are identified by a key in the format `PARAM@LABEL`, matching the variables listed when you print a station.
218
+
219
+ ### Using `fetch()`
220
+
221
+ ```python
222
+ dataset = station.fetch("CAUDAL@HIS_Q_MEDIA_D")
223
+ ```
224
+
225
+ ### Using `[]` (shorthand)
226
+
227
+ ```python
228
+ dataset = station["CAUDAL@HIS_Q_MEDIA_D"]
229
+ ```
230
+
231
+ Both return a `Dataset` object.
232
+
233
+ ---
234
+
235
+ ## Dataset
236
+
237
+ A `Dataset` represents a **time series associated with a station and a variable**.
238
+
239
+ ```python
240
+ dataset = station["NIVEL@NV_MEDIA_D"]
241
+
242
+ print(dataset.station) # Station object
243
+ print(dataset.variable) # Variable object (param, label, id)
244
+ print(dataset.data) # pandas DataFrame
245
+ ```
246
+
247
+ The `data` DataFrame has two columns:
248
+
249
+ | Column | Description |
250
+ |-------------|---------------------|
251
+ | `timestamp` | datetime |
252
+ | `value` | numeric measurement |
253
+
254
+ Example:
255
+
256
+ ```
257
+ timestamp value
258
+ 0 2025-01-01 2.31
259
+ 1 2025-01-02 2.28
260
+ 2 2025-01-03 2.35
261
+ ```
262
+
263
+ ---
264
+
265
+ ## Data Sources
266
+
267
+ ### Station Catalog
268
+
269
+ Fetched from the **Socrata SODA API** via Datos Abiertos Colombia:
270
+
271
+ ```
272
+ https://datos.gov.co/resource/hp9r-jxuu.json
273
+ ```
274
+
275
+ ### Time Series
276
+
277
+ Fetched from the **IDEAM Aquarius WebPortal**:
278
+
279
+ ```
280
+ http://aquariuswebportal.ideam.gov.co/
281
+ ```
282
+
283
+ Each variable key (`PARAM@LABEL`) corresponds to a unique Aquarius dataset ID used internally to retrieve the time series.
284
+
285
+ ---
286
+
287
+ *README made with [GitHub Copilot](https://github.com/features/copilot).*
288
+
@@ -0,0 +1,35 @@
1
+ [project]
2
+ name = "colombia-hydrodata"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = [
6
+ {name = "Sebástian Narváez",email = "sebnarvaez19@outlook.com"}
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.14"
10
+ dependencies = [
11
+ "pandas (>=3.0.1,<4.0.0)",
12
+ "requests (>=2.32.5,<3.0.0)",
13
+ "aquarius-webportal (>=0.4.1,<1.0.0)",
14
+ "shapely (>=2.1.2,<3.0.0)",
15
+ "geopandas (>=1.1.3,<2.0.0)",
16
+ "platformdirs (>=4.9.4,<5.0.0)",
17
+ "pyarrow (>=23.0.1,<24.0.0)"
18
+ ]
19
+
20
+ [tool.poetry]
21
+ packages = [{include = "colombia_hydrodata", from = "src"}]
22
+
23
+ [build-system]
24
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
25
+ build-backend = "poetry.core.masonry.api"
26
+
27
+ [dependency-groups]
28
+ dev = [
29
+ "jupyterlab (>=4.5.6,<5.0.0)",
30
+ "matplotlib (>=3.10.8,<4.0.0)",
31
+ "openpyxl (>=3.1.5,<4.0.0)",
32
+ "mkdocs-material (>=9.7.6,<10.0.0)",
33
+ "mkdocstrings[python] (>=1.0.3,<2.0.0)",
34
+ "mkdocs-autorefs (>=1.4.4,<2.0.0)"
35
+ ]
@@ -0,0 +1,7 @@
1
+ from colombia_hydrodata.attributes import Hydrographic, Location, Variable
2
+ from colombia_hydrodata.client import Client
3
+ from colombia_hydrodata.dataset import Dataset
4
+ from colombia_hydrodata.filters import Filters
5
+ from colombia_hydrodata.station import Station
6
+
7
+ __all__ = ["Client", "Station", "Dataset", "Filters", "Location", "Hydrographic", "Variable"]