pymast 0.0.6__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymast/__init__.py +31 -2
- pymast/fish_history.py +59 -6
- pymast/formatter.py +886 -548
- pymast/logger.py +58 -0
- pymast/naive_bayes.py +116 -9
- pymast/overlap_removal.py +2327 -490
- pymast/parsers.py +1111 -239
- pymast/predictors.py +302 -116
- pymast/radio_project.py +1382 -512
- pymast/validation.py +224 -0
- pymast-1.0.0.dist-info/METADATA +636 -0
- pymast-1.0.0.dist-info/RECORD +15 -0
- {pymast-0.0.6.dist-info → pymast-1.0.0.dist-info}/WHEEL +1 -1
- pymast/table_merge.py +0 -154
- pymast-0.0.6.dist-info/METADATA +0 -19
- pymast-0.0.6.dist-info/RECORD +0 -14
- {pymast-0.0.6.dist-info → pymast-1.0.0.dist-info/licenses}/LICENSE.txt +0 -0
- {pymast-0.0.6.dist-info → pymast-1.0.0.dist-info}/top_level.txt +0 -0
pymast/validation.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Input validation utilities for MAST
|
|
3
|
+
"""
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
class ValidationError(Exception):
|
|
8
|
+
"""Custom exception for validation errors"""
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
def validate_tag_data(tag_data):
|
|
12
|
+
"""
|
|
13
|
+
Validate master tag table has required columns and correct data types.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
tag_data : pandas.DataFrame
|
|
18
|
+
Master tag table to validate
|
|
19
|
+
|
|
20
|
+
Raises
|
|
21
|
+
------
|
|
22
|
+
ValidationError
|
|
23
|
+
If required columns are missing or data types are incorrect
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
bool
|
|
28
|
+
True if validation passes
|
|
29
|
+
"""
|
|
30
|
+
required_columns = {
|
|
31
|
+
'freq_code': 'object',
|
|
32
|
+
'pulse_rate': 'float',
|
|
33
|
+
'tag_type': 'object',
|
|
34
|
+
'rel_date': 'datetime64',
|
|
35
|
+
'cap_loc': 'object',
|
|
36
|
+
'rel_loc': 'object'
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Check required columns exist
|
|
40
|
+
missing_cols = set(required_columns.keys()) - set(tag_data.columns)
|
|
41
|
+
if missing_cols:
|
|
42
|
+
raise ValidationError(
|
|
43
|
+
f"Tag data missing required columns: {', '.join(missing_cols)}. "
|
|
44
|
+
f"See docs/API_REFERENCE.md for required schema."
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Check for duplicates in freq_code
|
|
48
|
+
if tag_data['freq_code'].duplicated().any():
|
|
49
|
+
duplicates = tag_data[tag_data['freq_code'].duplicated()]['freq_code'].values
|
|
50
|
+
raise ValidationError(
|
|
51
|
+
f"Duplicate freq_codes found in tag_data: {', '.join(duplicates[:5])}. "
|
|
52
|
+
f"Each freq_code must be unique."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Check tag_type values
|
|
56
|
+
valid_tag_types = ['study', 'BEACON', 'TEST']
|
|
57
|
+
invalid_types = set(tag_data['tag_type'].unique()) - set(valid_tag_types)
|
|
58
|
+
if invalid_types:
|
|
59
|
+
raise ValidationError(
|
|
60
|
+
f"Invalid tag_type values found: {', '.join(invalid_types)}. "
|
|
61
|
+
f"Valid values: {', '.join(valid_tag_types)}"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return True
|
|
65
|
+
|
|
66
|
+
def validate_receiver_data(receiver_data):
|
|
67
|
+
"""
|
|
68
|
+
Validate master receiver table has required columns.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
receiver_data : pandas.DataFrame
|
|
73
|
+
Master receiver table to validate
|
|
74
|
+
|
|
75
|
+
Raises
|
|
76
|
+
------
|
|
77
|
+
ValidationError
|
|
78
|
+
If required columns are missing
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
bool
|
|
83
|
+
True if validation passes
|
|
84
|
+
"""
|
|
85
|
+
required_columns = ['rec_id', 'rec_type', 'node']
|
|
86
|
+
|
|
87
|
+
missing_cols = set(required_columns) - set(receiver_data.columns)
|
|
88
|
+
if missing_cols:
|
|
89
|
+
raise ValidationError(
|
|
90
|
+
f"Receiver data missing required columns: {', '.join(missing_cols)}. "
|
|
91
|
+
f"See docs/API_REFERENCE.md for required schema."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Check for duplicate rec_id
|
|
95
|
+
if receiver_data['rec_id'].duplicated().any():
|
|
96
|
+
duplicates = receiver_data[receiver_data['rec_id'].duplicated()]['rec_id'].values
|
|
97
|
+
raise ValidationError(
|
|
98
|
+
f"Duplicate rec_id found in receiver_data: {', '.join(duplicates)}. "
|
|
99
|
+
f"Each rec_id must be unique."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Check receiver types
|
|
103
|
+
valid_rec_types = ['srx600', 'srx800', 'srx1200', 'orion', 'ares', 'VR2']
|
|
104
|
+
invalid_types = set(receiver_data['rec_type'].unique()) - set(valid_rec_types)
|
|
105
|
+
if invalid_types:
|
|
106
|
+
raise ValidationError(
|
|
107
|
+
f"Invalid rec_type values found: {', '.join(invalid_types)}. "
|
|
108
|
+
f"Valid values: {', '.join(valid_rec_types)}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
def validate_nodes_data(nodes_data):
|
|
114
|
+
"""
|
|
115
|
+
Validate network nodes table has required columns.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
nodes_data : pandas.DataFrame
|
|
120
|
+
Network nodes table to validate
|
|
121
|
+
|
|
122
|
+
Raises
|
|
123
|
+
------
|
|
124
|
+
ValidationError
|
|
125
|
+
If required columns are missing
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
bool
|
|
130
|
+
True if validation passes
|
|
131
|
+
"""
|
|
132
|
+
if nodes_data is None:
|
|
133
|
+
return True # Nodes are optional
|
|
134
|
+
|
|
135
|
+
required_columns = ['node', 'X', 'Y']
|
|
136
|
+
|
|
137
|
+
missing_cols = set(required_columns) - set(nodes_data.columns)
|
|
138
|
+
if missing_cols:
|
|
139
|
+
raise ValidationError(
|
|
140
|
+
f"Nodes data missing required columns: {', '.join(missing_cols)}. "
|
|
141
|
+
f"See docs/API_REFERENCE.md for required schema."
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Check for duplicate nodes
|
|
145
|
+
if nodes_data['node'].duplicated().any():
|
|
146
|
+
duplicates = nodes_data[nodes_data['node'].duplicated()]['node'].values
|
|
147
|
+
raise ValidationError(
|
|
148
|
+
f"Duplicate node IDs found: {', '.join(duplicates)}. "
|
|
149
|
+
f"Each node must be unique."
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return True
|
|
153
|
+
|
|
154
|
+
def validate_project_dir(project_dir):
|
|
155
|
+
"""
|
|
156
|
+
Validate project directory path.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
project_dir : str
|
|
161
|
+
Path to project directory
|
|
162
|
+
|
|
163
|
+
Raises
|
|
164
|
+
------
|
|
165
|
+
ValidationError
|
|
166
|
+
If path contains spaces or special characters
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
bool
|
|
171
|
+
True if validation passes
|
|
172
|
+
"""
|
|
173
|
+
# Check for spaces (recommended against but not fatal)
|
|
174
|
+
if ' ' in project_dir:
|
|
175
|
+
import warnings
|
|
176
|
+
warnings.warn(
|
|
177
|
+
"Project directory path contains spaces. "
|
|
178
|
+
"This may cause issues on some systems. "
|
|
179
|
+
"Consider using underscores instead."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Check path length (Windows limitation)
|
|
183
|
+
if len(project_dir) > 200:
|
|
184
|
+
raise ValidationError(
|
|
185
|
+
f"Project directory path is too long ({len(project_dir)} characters). "
|
|
186
|
+
f"Maximum recommended length: 200 characters."
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
return True
|
|
190
|
+
|
|
191
|
+
def validate_file_exists(file_path, file_description="File"):
|
|
192
|
+
"""
|
|
193
|
+
Check if a file exists and is readable.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
file_path : str
|
|
198
|
+
Path to file
|
|
199
|
+
file_description : str
|
|
200
|
+
Description of file for error message
|
|
201
|
+
|
|
202
|
+
Raises
|
|
203
|
+
------
|
|
204
|
+
FileNotFoundError
|
|
205
|
+
If file doesn't exist
|
|
206
|
+
PermissionError
|
|
207
|
+
If file exists but isn't readable
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
bool
|
|
212
|
+
True if file exists and is readable
|
|
213
|
+
"""
|
|
214
|
+
if not os.path.exists(file_path):
|
|
215
|
+
raise FileNotFoundError(
|
|
216
|
+
f"{file_description} not found: {file_path}"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
if not os.access(file_path, os.R_OK):
|
|
220
|
+
raise PermissionError(
|
|
221
|
+
f"{file_description} exists but is not readable: {file_path}"
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
return True
|