pydartdiags 0.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
pydartdiags/data.py ADDED
@@ -0,0 +1,246 @@
1
+ """
2
+ Data file utilities for pyDARTdiags examples.
3
+
4
+ This module provides functions to locate, download,
5
+ and cache example files used in pyDARTdiags examples.
6
+ The data files are cached in the users home directory
7
+ under ~/.pydartdiags/data.
8
+
9
+ """
10
+
11
+ import os
12
+ from pathlib import Path
13
+ import urllib.request
14
+ import zipfile
15
+ import shutil
16
+
17
+ # Zenodo DOI/URL for the data archive
18
+ ZENODO_RECORD_ID = "18135062"
19
+ ZENODO_RECORD_URL = f"https://zenodo.org/api/records/{ZENODO_RECORD_ID}/files-archive"
20
+ ZENODO_DOI = f"https://doi.org/10.5281/zenodo.{ZENODO_RECORD_ID}"
21
+
22
+
23
+ def get_data_cache_dir():
24
+ """Get the cache directory for downloaded data files."""
25
+ cache_dir = Path.home() / ".pydartdiags" / "data"
26
+ return cache_dir
27
+
28
+
29
+ def get_example_data(filename, auto_download=True):
30
+ """
31
+ Get path to a data file, checking multiple locations.
32
+
33
+ Searches for data files in the following order:
34
+ 1. Development location (../../data from package GitHub repo)
35
+ 2. Environment variable PYDARTDIAGS_DATA
36
+ 3. User cache directory (~/.pydartdiags/data)
37
+ 4. Downloads from Zenodo if auto_download=True
38
+
39
+ Parameters
40
+ ----------
41
+ filename : str
42
+ Name of the data file to locate
43
+ auto_download : bool, optional
44
+ If True, automatically download from Zenodo if not found locally.
45
+ Default is True.
46
+
47
+ Returns
48
+ -------
49
+ str
50
+ Absolute path to the data file
51
+
52
+ Raises
53
+ ------
54
+ FileNotFoundError
55
+ If the file is not found and auto_download=False
56
+
57
+ Examples
58
+ --------
59
+
60
+ .. code-block:: python
61
+
62
+ data_file = get_example_data("obs_seq.final.lorenz_96")
63
+
64
+ """
65
+ # 1. Check development location (for contributors/developers)
66
+ try:
67
+ package_dir = Path(__file__).parent.parent.parent
68
+ dev_data = package_dir / "data" / filename
69
+ print(f"package_dir: {package_dir}")
70
+ if dev_data.exists():
71
+ print(f"Using development data file: {dev_data}")
72
+ return str(dev_data)
73
+ except:
74
+ pass
75
+
76
+ # 2. Check environment variable
77
+ if "PYDARTDIAGS_DATA" in os.environ:
78
+ env_data = Path(os.environ["PYDARTDIAGS_DATA"]) / filename
79
+ if env_data.exists():
80
+ print(f"Using data file from PYDARTDIAGS_DATA: {env_data}")
81
+ return str(env_data)
82
+
83
+ # 3. Check cache directory
84
+ cache_dir = get_data_cache_dir()
85
+ cache_file = cache_dir / filename
86
+ if cache_file.exists():
87
+ print(f"Using cached data file: {cache_file}")
88
+ return str(cache_file)
89
+
90
+ # 4. File not found
91
+ if not auto_download:
92
+ raise FileNotFoundError(
93
+ f"Data file '{filename}' not found.\n\n"
94
+ f"To download example data:\n"
95
+ f" Option 1: Automatic download\n"
96
+ f" >>> from pydartdiags.data import download_all_data\n"
97
+ f" >>> download_all_data()\n\n"
98
+ f" Option 2: Manual download\n"
99
+ f" Download from: {ZENODO_DOI}\n"
100
+ f" Extract to: {cache_dir}\n\n"
101
+ f" Option 3: Set environment variable\n"
102
+ f" export PYDARTDIAGS_DATA=/path/to/your/data\n"
103
+ )
104
+
105
+ # Auto-download
106
+ print(f"Data file '{filename}' not found locally.")
107
+ print("Downloading all example data from Zenodo...")
108
+ download_all_data()
109
+
110
+ # Check again after download
111
+ if cache_file.exists():
112
+ return str(cache_file)
113
+ else:
114
+ raise FileNotFoundError(
115
+ f"Downloaded data but '{filename}' still not found. "
116
+ f"Please check {ZENODO_RECORD_URL}"
117
+ )
118
+
119
+
120
+ def download_all_data(force=False):
121
+ """
122
+ Download all example data files from Zenodo.
123
+
124
+ Downloads and extracts the complete data archive to the user's
125
+ cache directory (~/.pydartdiags/data).
126
+
127
+ Parameters
128
+ ----------
129
+ force : bool, optional
130
+ If True, re-download even if data already exists. Default is False.
131
+
132
+ Examples
133
+ --------
134
+
135
+ .. code-block:: python
136
+
137
+ from pydartdiags.data import download_all_data
138
+ download_all_data()
139
+
140
+ """
141
+ cache_dir = get_data_cache_dir()
142
+
143
+ if cache_dir.exists() and not force:
144
+ print(f"Data directory already exists: {cache_dir}")
145
+ print("Use force=True to re-download.")
146
+ return
147
+
148
+ cache_dir.mkdir(parents=True, exist_ok=True)
149
+
150
+ # Download archive
151
+ archive_file = cache_dir.parent / f"{ZENODO_RECORD_ID}.zip"
152
+
153
+ print(f"Downloading data from Zenodo ({ZENODO_DOI})...")
154
+ print(f"This may take a few minutes (approx. 85 MB)...")
155
+
156
+ try:
157
+ urllib.request.urlretrieve(ZENODO_RECORD_URL, archive_file)
158
+ print(f"Download complete: {archive_file}")
159
+
160
+ # Extract archive
161
+ print(f"Extracting to {cache_dir}...")
162
+ with zipfile.ZipFile(archive_file, "r") as zip_ref:
163
+ zip_ref.extractall(path=cache_dir)
164
+
165
+ # Clean up archive
166
+ archive_file.unlink()
167
+
168
+ print(f"Data successfully installed to {cache_dir}")
169
+ print(f"Found {len(list(cache_dir.glob('*')))} data files")
170
+
171
+ except Exception as e:
172
+ print(f"Error downloading data: {e}")
173
+ print(f"\nManual download instructions:")
174
+ print(f"1. Download from: {ZENODO_DOI}")
175
+ print(f"2. Extract to: {cache_dir}")
176
+ raise
177
+
178
+
179
+ def list_available_data():
180
+ """
181
+ List all available data files.
182
+
183
+ Returns
184
+ -------
185
+ list of str
186
+ List of available data file names
187
+
188
+ Examples
189
+ --------
190
+
191
+ .. code-block:: python
192
+
193
+ from pydartdiags.data import list_available_data
194
+ files = list_available_data()
195
+ print(files)
196
+
197
+ """
198
+ locations = []
199
+
200
+ # Check development location
201
+ try:
202
+ package_dir = Path(__file__).parent.parent.parent
203
+ dev_data = package_dir / "data"
204
+ if dev_data.exists():
205
+ locations.append(dev_data)
206
+ except:
207
+ pass
208
+
209
+ # Check environment variable
210
+ if "PYDARTDIAGS_DATA" in os.environ:
211
+ env_data = Path(os.environ["PYDARTDIAGS_DATA"])
212
+ if env_data.exists():
213
+ locations.append(env_data)
214
+
215
+ # Check cache
216
+ cache_dir = get_data_cache_dir()
217
+ if cache_dir.exists():
218
+ locations.append(cache_dir)
219
+
220
+ # Collect all files
221
+ files = set()
222
+ for loc in locations:
223
+ files.update([f.name for f in loc.glob("*") if f.is_file()])
224
+
225
+ return sorted(list(files))
226
+
227
+
228
+ def clear_cache():
229
+ """
230
+ Remove all downloaded data from the cache directory.
231
+
232
+ Examples
233
+ --------
234
+
235
+ .. code-block:: python
236
+
237
+ from pydartdiags.data import clear_cache
238
+ clear_cache()
239
+
240
+ """
241
+ cache_dir = get_data_cache_dir()
242
+ if cache_dir.exists():
243
+ shutil.rmtree(cache_dir)
244
+ print(f"Cleared cache: {cache_dir}")
245
+ else:
246
+ print("Cache directory does not exist.")
File without changes