isd-fetch 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isd_fetch-0.1.0/LICENSE +28 -0
- isd_fetch-0.1.0/PKG-INFO +99 -0
- isd_fetch-0.1.0/README.md +55 -0
- isd_fetch-0.1.0/isd_fetch.egg-info/PKG-INFO +99 -0
- isd_fetch-0.1.0/isd_fetch.egg-info/SOURCES.txt +11 -0
- isd_fetch-0.1.0/isd_fetch.egg-info/dependency_links.txt +1 -0
- isd_fetch-0.1.0/isd_fetch.egg-info/requires.txt +2 -0
- isd_fetch-0.1.0/isd_fetch.egg-info/top_level.txt +1 -0
- isd_fetch-0.1.0/pyisd/__init__.py +3 -0
- isd_fetch-0.1.0/pyisd/_isd_lite.py +111 -0
- isd_fetch-0.1.0/pyproject.toml +28 -0
- isd_fetch-0.1.0/setup.cfg +4 -0
- isd_fetch-0.1.0/tests/test_isd_lite.py +12 -0
isd_fetch-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024, Cyril
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
isd_fetch-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: isd-fetch
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python package for manipulating and analyzing ISD data.
|
|
5
|
+
Author: Cyril Joly
|
|
6
|
+
License: BSD 3-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024, Cyril
|
|
9
|
+
|
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
|
11
|
+
modification, are permitted provided that the following conditions are met:
|
|
12
|
+
|
|
13
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
14
|
+
list of conditions and the following disclaimer.
|
|
15
|
+
|
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
|
18
|
+
and/or other materials provided with the distribution.
|
|
19
|
+
|
|
20
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
21
|
+
contributors may be used to endorse or promote products derived from
|
|
22
|
+
this software without specific prior written permission.
|
|
23
|
+
|
|
24
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
25
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
26
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
27
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
28
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
29
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
30
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
31
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
32
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
33
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
34
|
+
|
|
35
|
+
Keywords: isd,noaa
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
+
Classifier: Operating System :: OS Independent
|
|
39
|
+
Requires-Python: >=3.8
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
License-File: LICENSE
|
|
42
|
+
Requires-Dist: geopandas
|
|
43
|
+
Requires-Dist: tqdm
|
|
44
|
+
|
|
45
|
+
# PyISD: A Python Package for NOAA's ISD Lite Dataset
|
|
46
|
+
|
|
47
|
+
**PyISD** is a Python package designed for loading and processing NOAA's ISD Lite dataset. The dataset, as described by NOAA, is a streamlined version of the full Integrated Surface Database (ISD). It includes eight common surface parameters in a fixed-width format, free of duplicate values, sub-hourly data, and complicated flags, making it suitable for general research and scientific purposes. For more details, visit the [official ISD homepage](https://www.ncei.noaa.gov/products/land-based-station/integrated-surface-database).
|
|
48
|
+
|
|
49
|
+
## **Features**
|
|
50
|
+
- Load and query the ISD Lite dataset with ease.
|
|
51
|
+
- Retrieve and process metadata for stations worldwide.
|
|
52
|
+
- Filter data based on spatial and temporal constraints.
|
|
53
|
+
|
|
54
|
+
## **Example Usage**
|
|
55
|
+
|
|
56
|
+
### **1. Importing and Loading Metadata**
|
|
57
|
+
You can start by importing the `IsdLite` module, fetching metadata for weather stations worldwide and displaying a sample of the station metadata:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from pyisd import IsdLite
|
|
61
|
+
|
|
62
|
+
CRS = 4326
|
|
63
|
+
|
|
64
|
+
module = IsdLite(crs=CRS, verbose=True)
|
|
65
|
+
module.raw_metadata.sample(5)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
The output displays station metadata including station name, latitude, longitude, elevation, and the period of available records:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
USAF WBAN STATION NAME CTRY ST CALL LAT LON \
|
|
72
|
+
8480 377350 99999 GANDJA AJ NaN NaN 40.717 46.417
|
|
73
|
+
1023 027710 99999 JOUTSA LEIVONMAKI SAVEN FI NaN NaN 61.883 26.100
|
|
74
|
+
11880 545340 99999 TANGSHAN CH NaN NaN 39.650 118.100
|
|
75
|
+
3795 111900 99999 EISENSTADT AU NaN NaN 47.850 16.533
|
|
76
|
+
26693 957119 99999 WEST WYALONG AIRPORT AS NaN NaN -33.930 147.200
|
|
77
|
+
|
|
78
|
+
ELEV(M) BEGIN END x y geometry
|
|
79
|
+
8480 309.0 19320101 20241117 46.417 40.717 POINT (46.417 40.717)
|
|
80
|
+
1023 146.0 20080115 20241112 26.100 61.883 POINT (26.1 61.883)
|
|
81
|
+
11880 29.0 19560820 20241112 118.100 39.650 POINT (118.1 39.65)
|
|
82
|
+
3795 189.3 19730627 20241117 16.533 47.850 POINT (16.533 47.85)
|
|
83
|
+
26693 262.0 19651231 19840629 147.200 -33.930 POINT (147.2 -33.93)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### **2. Fetching and Visualizing Data**
|
|
87
|
+
To retrieve data, you can specify the time period and spatial constraints. Here, we fetch temperature data (`temp`) for the bounding box around Paris between January 1, 2023, and November 20, 2024:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from pyisd.misc import get_box
|
|
91
|
+
|
|
92
|
+
geometry = get_box(place='Paris', width=1., crs=CRS)
|
|
93
|
+
|
|
94
|
+
data = module.get_data(start=20230101, end=20241120, geometry=geometry, organize_by='field')
|
|
95
|
+
|
|
96
|
+
data['temp'].plot(figsize=(10, 4), legend=False, c='grey', lw=0.6)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+

|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# PyISD: A Python Package for NOAA's ISD Lite Dataset
|
|
2
|
+
|
|
3
|
+
**PyISD** is a Python package designed for loading and processing NOAA's ISD Lite dataset. The dataset, as described by NOAA, is a streamlined version of the full Integrated Surface Database (ISD). It includes eight common surface parameters in a fixed-width format, free of duplicate values, sub-hourly data, and complicated flags, making it suitable for general research and scientific purposes. For more details, visit the [official ISD homepage](https://www.ncei.noaa.gov/products/land-based-station/integrated-surface-database).
|
|
4
|
+
|
|
5
|
+
## **Features**
|
|
6
|
+
- Load and query the ISD Lite dataset with ease.
|
|
7
|
+
- Retrieve and process metadata for stations worldwide.
|
|
8
|
+
- Filter data based on spatial and temporal constraints.
|
|
9
|
+
|
|
10
|
+
## **Example Usage**
|
|
11
|
+
|
|
12
|
+
### **1. Importing and Loading Metadata**
|
|
13
|
+
You can start by importing the `IsdLite` module, fetching metadata for weather stations worldwide and displaying a sample of the station metadata:
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from pyisd import IsdLite
|
|
17
|
+
|
|
18
|
+
CRS = 4326
|
|
19
|
+
|
|
20
|
+
module = IsdLite(crs=CRS, verbose=True)
|
|
21
|
+
module.raw_metadata.sample(5)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
The output displays station metadata including station name, latitude, longitude, elevation, and the period of available records:
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
USAF WBAN STATION NAME CTRY ST CALL LAT LON \
|
|
28
|
+
8480 377350 99999 GANDJA AJ NaN NaN 40.717 46.417
|
|
29
|
+
1023 027710 99999 JOUTSA LEIVONMAKI SAVEN FI NaN NaN 61.883 26.100
|
|
30
|
+
11880 545340 99999 TANGSHAN CH NaN NaN 39.650 118.100
|
|
31
|
+
3795 111900 99999 EISENSTADT AU NaN NaN 47.850 16.533
|
|
32
|
+
26693 957119 99999 WEST WYALONG AIRPORT AS NaN NaN -33.930 147.200
|
|
33
|
+
|
|
34
|
+
ELEV(M) BEGIN END x y geometry
|
|
35
|
+
8480 309.0 19320101 20241117 46.417 40.717 POINT (46.417 40.717)
|
|
36
|
+
1023 146.0 20080115 20241112 26.100 61.883 POINT (26.1 61.883)
|
|
37
|
+
11880 29.0 19560820 20241112 118.100 39.650 POINT (118.1 39.65)
|
|
38
|
+
3795 189.3 19730627 20241117 16.533 47.850 POINT (16.533 47.85)
|
|
39
|
+
26693 262.0 19651231 19840629 147.200 -33.930 POINT (147.2 -33.93)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### **2. Fetching and Visualizing Data**
|
|
43
|
+
To retrieve data, you can specify the time period and spatial constraints. Here, we fetch temperature data (`temp`) for the bounding box around Paris between January 1, 2023, and November 20, 2024:
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
from pyisd.misc import get_box
|
|
47
|
+
|
|
48
|
+
geometry = get_box(place='Paris', width=1., crs=CRS)
|
|
49
|
+
|
|
50
|
+
data = module.get_data(start=20230101, end=20241120, geometry=geometry, organize_by='field')
|
|
51
|
+
|
|
52
|
+
data['temp'].plot(figsize=(10, 4), legend=False, c='grey', lw=0.6)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+

|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: isd-fetch
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python package for manipulating and analyzing ISD data.
|
|
5
|
+
Author: Cyril Joly
|
|
6
|
+
License: BSD 3-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024, Cyril
|
|
9
|
+
|
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
|
11
|
+
modification, are permitted provided that the following conditions are met:
|
|
12
|
+
|
|
13
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
14
|
+
list of conditions and the following disclaimer.
|
|
15
|
+
|
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
|
18
|
+
and/or other materials provided with the distribution.
|
|
19
|
+
|
|
20
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
21
|
+
contributors may be used to endorse or promote products derived from
|
|
22
|
+
this software without specific prior written permission.
|
|
23
|
+
|
|
24
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
25
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
26
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
27
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
28
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
29
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
30
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
31
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
32
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
33
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
34
|
+
|
|
35
|
+
Keywords: isd,noaa
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
+
Classifier: Operating System :: OS Independent
|
|
39
|
+
Requires-Python: >=3.8
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
License-File: LICENSE
|
|
42
|
+
Requires-Dist: geopandas
|
|
43
|
+
Requires-Dist: tqdm
|
|
44
|
+
|
|
45
|
+
# PyISD: A Python Package for NOAA's ISD Lite Dataset
|
|
46
|
+
|
|
47
|
+
**PyISD** is a Python package designed for loading and processing NOAA's ISD Lite dataset. The dataset, as described by NOAA, is a streamlined version of the full Integrated Surface Database (ISD). It includes eight common surface parameters in a fixed-width format, free of duplicate values, sub-hourly data, and complicated flags, making it suitable for general research and scientific purposes. For more details, visit the [official ISD homepage](https://www.ncei.noaa.gov/products/land-based-station/integrated-surface-database).
|
|
48
|
+
|
|
49
|
+
## **Features**
|
|
50
|
+
- Load and query the ISD Lite dataset with ease.
|
|
51
|
+
- Retrieve and process metadata for stations worldwide.
|
|
52
|
+
- Filter data based on spatial and temporal constraints.
|
|
53
|
+
|
|
54
|
+
## **Example Usage**
|
|
55
|
+
|
|
56
|
+
### **1. Importing and Loading Metadata**
|
|
57
|
+
You can start by importing the `IsdLite` module, fetching metadata for weather stations worldwide and displaying a sample of the station metadata:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from pyisd import IsdLite
|
|
61
|
+
|
|
62
|
+
CRS = 4326
|
|
63
|
+
|
|
64
|
+
module = IsdLite(crs=CRS, verbose=True)
|
|
65
|
+
module.raw_metadata.sample(5)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
The output displays station metadata including station name, latitude, longitude, elevation, and the period of available records:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
USAF WBAN STATION NAME CTRY ST CALL LAT LON \
|
|
72
|
+
8480 377350 99999 GANDJA AJ NaN NaN 40.717 46.417
|
|
73
|
+
1023 027710 99999 JOUTSA LEIVONMAKI SAVEN FI NaN NaN 61.883 26.100
|
|
74
|
+
11880 545340 99999 TANGSHAN CH NaN NaN 39.650 118.100
|
|
75
|
+
3795 111900 99999 EISENSTADT AU NaN NaN 47.850 16.533
|
|
76
|
+
26693 957119 99999 WEST WYALONG AIRPORT AS NaN NaN -33.930 147.200
|
|
77
|
+
|
|
78
|
+
ELEV(M) BEGIN END x y geometry
|
|
79
|
+
8480 309.0 19320101 20241117 46.417 40.717 POINT (46.417 40.717)
|
|
80
|
+
1023 146.0 20080115 20241112 26.100 61.883 POINT (26.1 61.883)
|
|
81
|
+
11880 29.0 19560820 20241112 118.100 39.650 POINT (118.1 39.65)
|
|
82
|
+
3795 189.3 19730627 20241117 16.533 47.850 POINT (16.533 47.85)
|
|
83
|
+
26693 262.0 19651231 19840629 147.200 -33.930 POINT (147.2 -33.93)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### **2. Fetching and Visualizing Data**
|
|
87
|
+
To retrieve data, you can specify the time period and spatial constraints. Here, we fetch temperature data (`temp`) for the bounding box around Paris between January 1, 2023, and November 20, 2024:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from pyisd.misc import get_box
|
|
91
|
+
|
|
92
|
+
geometry = get_box(place='Paris', width=1., crs=CRS)
|
|
93
|
+
|
|
94
|
+
data = module.get_data(start=20230101, end=20241120, geometry=geometry, organize_by='field')
|
|
95
|
+
|
|
96
|
+
data['temp'].plot(figsize=(10, 4), legend=False, c='grey', lw=0.6)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+

|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
isd_fetch.egg-info/PKG-INFO
|
|
5
|
+
isd_fetch.egg-info/SOURCES.txt
|
|
6
|
+
isd_fetch.egg-info/dependency_links.txt
|
|
7
|
+
isd_fetch.egg-info/requires.txt
|
|
8
|
+
isd_fetch.egg-info/top_level.txt
|
|
9
|
+
pyisd/__init__.py
|
|
10
|
+
pyisd/_isd_lite.py
|
|
11
|
+
tests/test_isd_lite.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pyisd
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
2
|
+
from time import sleep
|
|
3
|
+
from urllib.parse import urljoin
|
|
4
|
+
|
|
5
|
+
import geopandas as gpd
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from tqdm.auto import tqdm
|
|
8
|
+
|
|
9
|
+
from .misc import check_params, daterange, proj, to_crs
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class IsdLite:
|
|
13
|
+
raw_metadata_url = 'https://www.ncei.noaa.gov/pub/data/noaa/isd-history.txt'
|
|
14
|
+
data_url = "https://www.ncei.noaa.gov/pub/data/noaa/isd-lite/{year}/"
|
|
15
|
+
fields = ('temp', 'dewtemp', 'pressure', 'winddirection', 'windspeed', 'skycoverage', 'precipitation-1h', 'precipitation-6h')
|
|
16
|
+
max_retries = 5
|
|
17
|
+
|
|
18
|
+
def __init__(self, crs=4326, verbose=0):
|
|
19
|
+
self.crs = to_crs(crs)
|
|
20
|
+
self._get_raw_metadata()
|
|
21
|
+
self.verbose = verbose
|
|
22
|
+
|
|
23
|
+
def _get_raw_metadata(self):
|
|
24
|
+
for attempt in range(self.max_retries):
|
|
25
|
+
try:
|
|
26
|
+
metadata = pd.read_fwf(self.raw_metadata_url, skiprows=19)
|
|
27
|
+
metadata = metadata.dropna(subset=['LAT', 'LON'])
|
|
28
|
+
metadata['x'], metadata['y'] = proj(metadata['LON'], metadata['LAT'], 4326, self.crs)
|
|
29
|
+
self._raw_metadata = gpd.GeoDataFrame(metadata, geometry=gpd.points_from_xy(metadata.x, metadata.y, crs=self.crs))
|
|
30
|
+
except Exception as e:
|
|
31
|
+
if attempt < self.max_retries - 1:
|
|
32
|
+
sleep(2)
|
|
33
|
+
else:
|
|
34
|
+
raise RuntimeError(f"Failed to download metadata after {self.max_retries} attempts.") from e
|
|
35
|
+
|
|
36
|
+
def _filter_metadata(self, geometry):
|
|
37
|
+
if geometry is None:
|
|
38
|
+
return self._raw_metadata['USAF'].unique()
|
|
39
|
+
else:
|
|
40
|
+
return gpd.clip(self._raw_metadata, geometry)['USAF'].unique()
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def _download_read(cls, url):
|
|
44
|
+
time_features = ['year', 'month', 'day', 'hour']
|
|
45
|
+
df = pd.read_csv(url, sep='\\s+', header=None, na_values=-9999)
|
|
46
|
+
df.columns = time_features + list(cls.fields)
|
|
47
|
+
df[['temp', 'dewtemp', 'pressure', 'windspeed']] /= 10.
|
|
48
|
+
df.index = pd.to_datetime(df[time_features])
|
|
49
|
+
df = df.drop(columns=time_features)
|
|
50
|
+
return df
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def _download_data_id(cls, usaf_id, years):
|
|
54
|
+
ret = []
|
|
55
|
+
for year in years:
|
|
56
|
+
try:
|
|
57
|
+
df = cls._download_read(urljoin(cls.data_url.format(year=year), f'{usaf_id}-99999-{year}.gz'))
|
|
58
|
+
ret.append(df)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
if ret:
|
|
63
|
+
return pd.concat(ret)
|
|
64
|
+
else:
|
|
65
|
+
return pd.DataFrame()
|
|
66
|
+
|
|
67
|
+
def get_data(self, start, end=None, geometry=None, organize_by='location', n_jobs=8):
|
|
68
|
+
"""
|
|
69
|
+
Fetches weather data from the ISD-Lite dataset for the specified time range and location.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
start (datetime): The start date for the data retrieval.
|
|
73
|
+
end (datetime, optional): The end date for the data retrieval. If not provided, defaults to the start date.
|
|
74
|
+
geometry (GeoSeries, optional): A GeoSeries or geometry object to filter stations by spatial location.
|
|
75
|
+
If None, data for all stations will be retrieved. Defaults to None.
|
|
76
|
+
organize_by (str, optional): Determines how the resulting data is organized. Options are:
|
|
77
|
+
- 'location': Organize data by weather station.
|
|
78
|
+
- 'field': Organize data by weather variable.
|
|
79
|
+
Defaults to 'location'.
|
|
80
|
+
n_jobs (int, optional): The number of threads to use for parallel data downloads. Defaults to 8.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
dict: A dictionary containing the weather data. The structure of the dictionary depends on the
|
|
84
|
+
`organize_by` parameter:
|
|
85
|
+
- If 'location': Keys are station IDs, and values are DataFrames with weather data.
|
|
86
|
+
- If 'field': Keys are weather variables, and values are DataFrames with stations as columns.
|
|
87
|
+
|
|
88
|
+
Raises:
|
|
89
|
+
ValueError: If `organize_by` is not one of the allowed options.
|
|
90
|
+
"""
|
|
91
|
+
check_params(param=organize_by, params=('field', 'location'))
|
|
92
|
+
time = daterange(start, end, freq='h')
|
|
93
|
+
years = time.year.unique()
|
|
94
|
+
usaf_ids = self._filter_metadata(geometry=geometry)
|
|
95
|
+
|
|
96
|
+
def fetch_data(usaf_id):
|
|
97
|
+
return usaf_id, self._download_data_id(usaf_id=usaf_id, years=years).reindex(index=time)
|
|
98
|
+
|
|
99
|
+
ret = {}
|
|
100
|
+
with ThreadPoolExecutor(max_workers=n_jobs) as executor:
|
|
101
|
+
futures = {executor.submit(fetch_data, usaf_id): usaf_id for usaf_id in usaf_ids}
|
|
102
|
+
|
|
103
|
+
for future in tqdm(as_completed(futures), total=len(futures), disable=(not self.verbose)):
|
|
104
|
+
usaf_id, data = future.result()
|
|
105
|
+
if data.size > 0:
|
|
106
|
+
ret[usaf_id] = data
|
|
107
|
+
|
|
108
|
+
if organize_by == 'field':
|
|
109
|
+
ret = {field: pd.concat([ret[usaf_id][field].rename(usaf_id) for usaf_id in ret], axis=1) for field in self.fields}
|
|
110
|
+
|
|
111
|
+
return ret
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[tool.setuptools]
|
|
6
|
+
packages = ["pyisd"]
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "isd-fetch"
|
|
10
|
+
version = "0.1.0"
|
|
11
|
+
description = "A Python package for manipulating and analyzing ISD data."
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Cyril Joly"}
|
|
14
|
+
]
|
|
15
|
+
license = { file = "LICENSE" }
|
|
16
|
+
readme = "README.md"
|
|
17
|
+
keywords = ["isd", "noaa"]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
]
|
|
23
|
+
requires-python = ">=3.8"
|
|
24
|
+
|
|
25
|
+
dependencies = [
|
|
26
|
+
"geopandas",
|
|
27
|
+
"tqdm"
|
|
28
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from pyisd import IsdLite
|
|
2
|
+
from pyisd.misc import get_box
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_isdlite():
|
|
6
|
+
CRS = 4326
|
|
7
|
+
geometry = get_box(place='Paris', width=1., crs=CRS)
|
|
8
|
+
module = IsdLite()
|
|
9
|
+
data = module.get_data(start=20230101, end=20241231, geometry=geometry, organize_by='location')
|
|
10
|
+
assert data[list(data.keys())[0]].size > 0
|
|
11
|
+
data = module.get_data(start=20230101, end=20241231, geometry=geometry, organize_by='field')
|
|
12
|
+
assert data['temp'].size > 0
|