pymast 0.0.6__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pymast/validation.py ADDED
@@ -0,0 +1,224 @@
1
+ """
2
+ Input validation utilities for MAST
3
+ """
4
+ import pandas as pd
5
+ import os
6
+
7
+ class ValidationError(Exception):
8
+ """Custom exception for validation errors"""
9
+ pass
10
+
11
+ def validate_tag_data(tag_data):
12
+ """
13
+ Validate master tag table has required columns and correct data types.
14
+
15
+ Parameters
16
+ ----------
17
+ tag_data : pandas.DataFrame
18
+ Master tag table to validate
19
+
20
+ Raises
21
+ ------
22
+ ValidationError
23
+ If required columns are missing or data types are incorrect
24
+
25
+ Returns
26
+ -------
27
+ bool
28
+ True if validation passes
29
+ """
30
+ required_columns = {
31
+ 'freq_code': 'object',
32
+ 'pulse_rate': 'float',
33
+ 'tag_type': 'object',
34
+ 'rel_date': 'datetime64',
35
+ 'cap_loc': 'object',
36
+ 'rel_loc': 'object'
37
+ }
38
+
39
+ # Check required columns exist
40
+ missing_cols = set(required_columns.keys()) - set(tag_data.columns)
41
+ if missing_cols:
42
+ raise ValidationError(
43
+ f"Tag data missing required columns: {', '.join(missing_cols)}. "
44
+ f"See docs/API_REFERENCE.md for required schema."
45
+ )
46
+
47
+ # Check for duplicates in freq_code
48
+ if tag_data['freq_code'].duplicated().any():
49
+ duplicates = tag_data[tag_data['freq_code'].duplicated()]['freq_code'].values
50
+ raise ValidationError(
51
+ f"Duplicate freq_codes found in tag_data: {', '.join(duplicates[:5])}. "
52
+ f"Each freq_code must be unique."
53
+ )
54
+
55
+ # Check tag_type values
56
+ valid_tag_types = ['study', 'BEACON', 'TEST']
57
+ invalid_types = set(tag_data['tag_type'].unique()) - set(valid_tag_types)
58
+ if invalid_types:
59
+ raise ValidationError(
60
+ f"Invalid tag_type values found: {', '.join(invalid_types)}. "
61
+ f"Valid values: {', '.join(valid_tag_types)}"
62
+ )
63
+
64
+ return True
65
+
66
+ def validate_receiver_data(receiver_data):
67
+ """
68
+ Validate master receiver table has required columns.
69
+
70
+ Parameters
71
+ ----------
72
+ receiver_data : pandas.DataFrame
73
+ Master receiver table to validate
74
+
75
+ Raises
76
+ ------
77
+ ValidationError
78
+ If required columns are missing
79
+
80
+ Returns
81
+ -------
82
+ bool
83
+ True if validation passes
84
+ """
85
+ required_columns = ['rec_id', 'rec_type', 'node']
86
+
87
+ missing_cols = set(required_columns) - set(receiver_data.columns)
88
+ if missing_cols:
89
+ raise ValidationError(
90
+ f"Receiver data missing required columns: {', '.join(missing_cols)}. "
91
+ f"See docs/API_REFERENCE.md for required schema."
92
+ )
93
+
94
+ # Check for duplicate rec_id
95
+ if receiver_data['rec_id'].duplicated().any():
96
+ duplicates = receiver_data[receiver_data['rec_id'].duplicated()]['rec_id'].values
97
+ raise ValidationError(
98
+ f"Duplicate rec_id found in receiver_data: {', '.join(duplicates)}. "
99
+ f"Each rec_id must be unique."
100
+ )
101
+
102
+ # Check receiver types
103
+ valid_rec_types = ['srx600', 'srx800', 'srx1200', 'orion', 'ares', 'VR2']
104
+ invalid_types = set(receiver_data['rec_type'].unique()) - set(valid_rec_types)
105
+ if invalid_types:
106
+ raise ValidationError(
107
+ f"Invalid rec_type values found: {', '.join(invalid_types)}. "
108
+ f"Valid values: {', '.join(valid_rec_types)}"
109
+ )
110
+
111
+ return True
112
+
113
+ def validate_nodes_data(nodes_data):
114
+ """
115
+ Validate network nodes table has required columns.
116
+
117
+ Parameters
118
+ ----------
119
+ nodes_data : pandas.DataFrame
120
+ Network nodes table to validate
121
+
122
+ Raises
123
+ ------
124
+ ValidationError
125
+ If required columns are missing
126
+
127
+ Returns
128
+ -------
129
+ bool
130
+ True if validation passes
131
+ """
132
+ if nodes_data is None:
133
+ return True # Nodes are optional
134
+
135
+ required_columns = ['node', 'X', 'Y']
136
+
137
+ missing_cols = set(required_columns) - set(nodes_data.columns)
138
+ if missing_cols:
139
+ raise ValidationError(
140
+ f"Nodes data missing required columns: {', '.join(missing_cols)}. "
141
+ f"See docs/API_REFERENCE.md for required schema."
142
+ )
143
+
144
+ # Check for duplicate nodes
145
+ if nodes_data['node'].duplicated().any():
146
+ duplicates = nodes_data[nodes_data['node'].duplicated()]['node'].values
147
+ raise ValidationError(
148
+ f"Duplicate node IDs found: {', '.join(duplicates)}. "
149
+ f"Each node must be unique."
150
+ )
151
+
152
+ return True
153
+
154
+ def validate_project_dir(project_dir):
155
+ """
156
+ Validate project directory path.
157
+
158
+ Parameters
159
+ ----------
160
+ project_dir : str
161
+ Path to project directory
162
+
163
+ Raises
164
+ ------
165
+ ValidationError
166
+ If path contains spaces or special characters
167
+
168
+ Returns
169
+ -------
170
+ bool
171
+ True if validation passes
172
+ """
173
+ # Check for spaces (recommended against but not fatal)
174
+ if ' ' in project_dir:
175
+ import warnings
176
+ warnings.warn(
177
+ "Project directory path contains spaces. "
178
+ "This may cause issues on some systems. "
179
+ "Consider using underscores instead."
180
+ )
181
+
182
+ # Check path length (Windows limitation)
183
+ if len(project_dir) > 200:
184
+ raise ValidationError(
185
+ f"Project directory path is too long ({len(project_dir)} characters). "
186
+ f"Maximum recommended length: 200 characters."
187
+ )
188
+
189
+ return True
190
+
191
+ def validate_file_exists(file_path, file_description="File"):
192
+ """
193
+ Check if a file exists and is readable.
194
+
195
+ Parameters
196
+ ----------
197
+ file_path : str
198
+ Path to file
199
+ file_description : str
200
+ Description of file for error message
201
+
202
+ Raises
203
+ ------
204
+ FileNotFoundError
205
+ If file doesn't exist
206
+ PermissionError
207
+ If file exists but isn't readable
208
+
209
+ Returns
210
+ -------
211
+ bool
212
+ True if file exists and is readable
213
+ """
214
+ if not os.path.exists(file_path):
215
+ raise FileNotFoundError(
216
+ f"{file_description} not found: {file_path}"
217
+ )
218
+
219
+ if not os.access(file_path, os.R_OK):
220
+ raise PermissionError(
221
+ f"{file_description} exists but is not readable: {file_path}"
222
+ )
223
+
224
+ return True