rdetoolkit 1.5.1__cp312-cp312-win_amd64.whl → 1.5.3__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdetoolkit/__init__.py +19 -1
- rdetoolkit/cli/validate.py +84 -55
- rdetoolkit/cmd/validate.py +18 -15
- rdetoolkit/config.py +211 -23
- rdetoolkit/core.cp312-win_amd64.pyd +0 -0
- rdetoolkit/exceptions.py +94 -0
- rdetoolkit/exceptions.pyi +21 -0
- rdetoolkit/invoicefile.py +11 -9
- rdetoolkit/models/metadata.py +97 -2
- rdetoolkit/models/metadata.pyi +22 -1
- rdetoolkit/processing/processors/invoice.py +41 -0
- rdetoolkit/validation.py +94 -8
- rdetoolkit/validation.pyi +5 -0
- rdetoolkit/workflows.py +8 -2
- {rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/METADATA +17 -9
- {rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/RECORD +19 -19
- {rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/WHEEL +0 -0
- {rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/entry_points.txt +0 -0
- {rdetoolkit-1.5.1.dist-info → rdetoolkit-1.5.3.dist-info}/licenses/LICENSE +0 -0
rdetoolkit/exceptions.py
CHANGED
|
@@ -176,3 +176,97 @@ class InvalidSearchParametersError(Exception):
|
|
|
176
176
|
def __init__(self, message: str = "Invalid search term") -> None:
|
|
177
177
|
self.message = message
|
|
178
178
|
super().__init__(self.message)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class ConfigError(Exception):
|
|
182
|
+
"""Exception raised for configuration file loading errors.
|
|
183
|
+
|
|
184
|
+
This exception provides structured, informative error messages for configuration
|
|
185
|
+
file failures, including file paths, error types, line/column information for
|
|
186
|
+
parse errors, and documentation links.
|
|
187
|
+
|
|
188
|
+
Attributes:
|
|
189
|
+
message: The error message describing what went wrong.
|
|
190
|
+
file_path: Path to the configuration file that failed to load.
|
|
191
|
+
error_type: Type of error (e.g., 'file_not_found', 'parse_error', 'validation_error').
|
|
192
|
+
line_number: Line number where error occurred (for parse errors).
|
|
193
|
+
column_number: Column number where error occurred (for parse errors).
|
|
194
|
+
field_name: Field name that failed validation (for validation errors).
|
|
195
|
+
doc_url: Documentation URL for help and troubleshooting.
|
|
196
|
+
|
|
197
|
+
Examples:
|
|
198
|
+
File not found error:
|
|
199
|
+
>>> raise ConfigError(
|
|
200
|
+
... "Configuration file not found",
|
|
201
|
+
... file_path="config.yaml",
|
|
202
|
+
... error_type="file_not_found"
|
|
203
|
+
... )
|
|
204
|
+
|
|
205
|
+
Parse error with line information:
|
|
206
|
+
>>> raise ConfigError(
|
|
207
|
+
... "Invalid YAML syntax: expected <block end>",
|
|
208
|
+
... file_path="config.yaml",
|
|
209
|
+
... error_type="parse_error",
|
|
210
|
+
... line_number=10,
|
|
211
|
+
... column_number=5
|
|
212
|
+
... )
|
|
213
|
+
|
|
214
|
+
Validation error with field information:
|
|
215
|
+
>>> raise ConfigError(
|
|
216
|
+
... "Invalid value for field",
|
|
217
|
+
... file_path="config.yaml",
|
|
218
|
+
... error_type="validation_error",
|
|
219
|
+
... field_name="system.extended_mode"
|
|
220
|
+
... )
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
def __init__(
|
|
224
|
+
self,
|
|
225
|
+
message: str,
|
|
226
|
+
*,
|
|
227
|
+
file_path: str | None = None,
|
|
228
|
+
error_type: str = "unknown",
|
|
229
|
+
line_number: int | None = None,
|
|
230
|
+
column_number: int | None = None,
|
|
231
|
+
field_name: str | None = None,
|
|
232
|
+
doc_url: str = "https://nims-mdpf.github.io/rdetoolkit/usage/config/config/",
|
|
233
|
+
) -> None:
|
|
234
|
+
"""Initialize ConfigError with detailed information.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
message: The error message describing what went wrong.
|
|
238
|
+
file_path: Path to the configuration file that failed.
|
|
239
|
+
error_type: Type of error (e.g., 'file_not_found', 'parse_error', 'validation_error').
|
|
240
|
+
line_number: Line number where error occurred (for parse errors).
|
|
241
|
+
column_number: Column number where error occurred (for parse errors).
|
|
242
|
+
field_name: Field name that failed validation (for validation errors).
|
|
243
|
+
doc_url: Documentation URL for help and troubleshooting.
|
|
244
|
+
"""
|
|
245
|
+
self.message = message
|
|
246
|
+
self.file_path = file_path
|
|
247
|
+
self.error_type = error_type
|
|
248
|
+
self.line_number = line_number
|
|
249
|
+
self.column_number = column_number
|
|
250
|
+
self.field_name = field_name
|
|
251
|
+
self.doc_url = doc_url
|
|
252
|
+
|
|
253
|
+
# Build comprehensive error message
|
|
254
|
+
parts = []
|
|
255
|
+
if file_path:
|
|
256
|
+
parts.append(f"Configuration file: '{file_path}'")
|
|
257
|
+
|
|
258
|
+
parts.append(message)
|
|
259
|
+
|
|
260
|
+
if line_number is not None:
|
|
261
|
+
location = f"line {line_number}"
|
|
262
|
+
if column_number is not None:
|
|
263
|
+
location += f", column {column_number}"
|
|
264
|
+
parts.append(f"Location: {location}")
|
|
265
|
+
|
|
266
|
+
if field_name:
|
|
267
|
+
parts.append(f"Field: {field_name}")
|
|
268
|
+
|
|
269
|
+
parts.append(f"See: {doc_url}")
|
|
270
|
+
|
|
271
|
+
full_message = "\n".join(parts)
|
|
272
|
+
super().__init__(full_message)
|
rdetoolkit/exceptions.pyi
CHANGED
|
@@ -57,3 +57,24 @@ class NoResultsFoundError(Exception):
|
|
|
57
57
|
class InvalidSearchParametersError(Exception):
|
|
58
58
|
message: Incomplete
|
|
59
59
|
def __init__(self, message: str = 'Invalid search term') -> None: ...
|
|
60
|
+
|
|
61
|
+
class ConfigError(Exception):
|
|
62
|
+
message: str
|
|
63
|
+
file_path: str | None
|
|
64
|
+
error_type: str
|
|
65
|
+
line_number: int | None
|
|
66
|
+
column_number: int | None
|
|
67
|
+
field_name: str | None
|
|
68
|
+
doc_url: str
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
message: str,
|
|
73
|
+
*,
|
|
74
|
+
file_path: str | None = None,
|
|
75
|
+
error_type: str = 'unknown',
|
|
76
|
+
line_number: int | None = None,
|
|
77
|
+
column_number: int | None = None,
|
|
78
|
+
field_name: str | None = None,
|
|
79
|
+
doc_url: str = 'https://nims-mdpf.github.io/rdetoolkit/usage/config/config/',
|
|
80
|
+
) -> None: ...
|
rdetoolkit/invoicefile.py
CHANGED
|
@@ -160,16 +160,18 @@ def check_exist_rawfiles(dfexcelinvoice: pd.DataFrame, excel_rawfiles: list[Path
|
|
|
160
160
|
excel_rawfiles (list[Path]): A list of Path objects representing file paths.
|
|
161
161
|
|
|
162
162
|
Raises:
|
|
163
|
-
|
|
163
|
+
StructuredError: If any file name in dfexcelinvoice is not found in excel_rawfiles.
|
|
164
164
|
|
|
165
165
|
Returns:
|
|
166
166
|
list[Path]: A list of Path objects corresponding to the file names in dfexcelinvoice, ordered as they appear in the DataFrame.
|
|
167
167
|
"""
|
|
168
|
-
file_set_group = {f.name for f in excel_rawfiles}
|
|
169
|
-
file_set_invoice = set(dfexcelinvoice["data_file_names/name"])
|
|
170
|
-
if file_set_invoice - file_set_group:
|
|
171
|
-
|
|
172
|
-
|
|
168
|
+
file_set_group = {f.name for f in excel_rawfiles}
|
|
169
|
+
file_set_invoice = set(dfexcelinvoice["data_file_names/name"])
|
|
170
|
+
if file_set_invoice - file_set_group:
|
|
171
|
+
missing = sorted(file_set_invoice - file_set_group)
|
|
172
|
+
missing_display = (str(name) for name in missing)
|
|
173
|
+
emsg = f"ERROR: raw file not found: {', '.join(missing_display)}"
|
|
174
|
+
raise StructuredError(emsg)
|
|
173
175
|
# Sort excel_rawfiles in the order they appear in the invoice
|
|
174
176
|
_tmp = {f.name: f for f in excel_rawfiles}
|
|
175
177
|
try:
|
|
@@ -1152,7 +1154,8 @@ class RuleBasedReplacer:
|
|
|
1152
1154
|
Args:
|
|
1153
1155
|
replacements (Mapping[str, Any]): The object containing mapping rules (read-only).
|
|
1154
1156
|
source_json_obj (MutableMapping[str, Any] | None): Objects of key and value to which you want to apply the rule (performs nested assignments).
|
|
1155
|
-
mapping_rules (Mapping[str, str] | None, optional): Rules for mapping key and value (read-only).
|
|
1157
|
+
mapping_rules (Mapping[str, str] | None, optional): Rules for mapping key and value (read-only).
|
|
1158
|
+
If None, uses self.rules. Defaults to None.
|
|
1156
1159
|
|
|
1157
1160
|
Returns:
|
|
1158
1161
|
dict[str, Any]: dictionary type data after conversion
|
|
@@ -1173,13 +1176,12 @@ class RuleBasedReplacer:
|
|
|
1173
1176
|
result = replacer.apply_rules(replacement_rule, save_file_path, mapping_rules = rule)
|
|
1174
1177
|
print(result)
|
|
1175
1178
|
"""
|
|
1176
|
-
# [TODO] Correction of type definitions in version 0.1.6
|
|
1177
1179
|
if mapping_rules is None:
|
|
1178
1180
|
mapping_rules = self.rules
|
|
1179
1181
|
if source_json_obj is None:
|
|
1180
1182
|
source_json_obj = {}
|
|
1181
1183
|
|
|
1182
|
-
for key, value in
|
|
1184
|
+
for key, value in mapping_rules.items():
|
|
1183
1185
|
keys = key.split(".")
|
|
1184
1186
|
replace_value = replacements.get(value, "")
|
|
1185
1187
|
current_obj: MutableMapping[str, Any] = source_json_obj
|
rdetoolkit/models/metadata.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Any, Final
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel, RootModel, field_validator
|
|
5
|
+
from pydantic import AnyUrl, BaseModel, ConfigDict, Field, RootModel, field_validator
|
|
6
6
|
|
|
7
7
|
MAX_VALUE_SIZE: Final[int] = 1024
|
|
8
8
|
|
|
@@ -71,7 +71,7 @@ class ValidableItems(RootModel):
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
class MetadataItem(BaseModel):
|
|
74
|
-
"""metadata
|
|
74
|
+
"""metadata.json class.
|
|
75
75
|
|
|
76
76
|
Stores metadata extracted by the data structuring process.
|
|
77
77
|
|
|
@@ -82,3 +82,98 @@ class MetadataItem(BaseModel):
|
|
|
82
82
|
|
|
83
83
|
constant: dict[str, MetaValue]
|
|
84
84
|
variable: ValidableItems
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class NameField(BaseModel):
|
|
88
|
+
"""Multilingual name field for metadata definition.
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
ja: Japanese name
|
|
92
|
+
en: English name
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
ja: str
|
|
96
|
+
en: str
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class SchemaField(BaseModel):
|
|
100
|
+
"""Schema field for metadata definition.
|
|
101
|
+
|
|
102
|
+
Attributes:
|
|
103
|
+
type: Type of the metadata value. One of "array", "boolean", "integer", "number", "string"
|
|
104
|
+
format: Optional format specifier. One of "date-time" or "duration"
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
type: str # "array", "boolean", "integer", "number", "string"
|
|
108
|
+
format: str | None = None # "date-time", "duration"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class MetadataDefEntry(BaseModel):
|
|
112
|
+
"""Single metadata definition entry in metadata-def.json.
|
|
113
|
+
|
|
114
|
+
Represents one metadata item definition. This is used for metadata-def.json,
|
|
115
|
+
not for metadata.json (which uses MetadataItem instead).
|
|
116
|
+
|
|
117
|
+
Attributes:
|
|
118
|
+
name: Multilingual name (ja/en required)
|
|
119
|
+
schema_field: Type and format definition (type required, serialized as "schema")
|
|
120
|
+
unit: Optional unit for the metadata value
|
|
121
|
+
description: Optional description
|
|
122
|
+
uri: Optional URI/URL for the metadata key
|
|
123
|
+
mode: Optional measurement mode
|
|
124
|
+
order: Optional display order
|
|
125
|
+
original_name: Optional original name (serialized as "originalName")
|
|
126
|
+
|
|
127
|
+
Example:
|
|
128
|
+
```json
|
|
129
|
+
{
|
|
130
|
+
"temperature": {
|
|
131
|
+
"name": {"ja": "温度", "en": "Temperature"},
|
|
132
|
+
"schema": {"type": "number"},
|
|
133
|
+
"unit": "K"
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
name: NameField
|
|
140
|
+
schema_field: SchemaField = Field(alias="schema")
|
|
141
|
+
unit: str | None = None
|
|
142
|
+
description: str | None = None
|
|
143
|
+
uri: AnyUrl | None = None
|
|
144
|
+
mode: str | None = None
|
|
145
|
+
order: int | None = None
|
|
146
|
+
original_name: str | None = Field(default=None, alias="originalName")
|
|
147
|
+
|
|
148
|
+
model_config = ConfigDict(
|
|
149
|
+
# Allow undefined fields (e.g., "variable" field is ignored per docs)
|
|
150
|
+
extra="allow",
|
|
151
|
+
# Enable alias for JSON parsing and serialization
|
|
152
|
+
populate_by_name=True,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class MetadataDefinition(RootModel):
|
|
157
|
+
"""metadata-def.json root model.
|
|
158
|
+
|
|
159
|
+
Represents the entire metadata definition file as a dictionary
|
|
160
|
+
mapping metadata keys to their definitions. This is used for
|
|
161
|
+
metadata-def.json, not for metadata.json (which uses MetadataItem instead).
|
|
162
|
+
|
|
163
|
+
Example:
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"temperature": {
|
|
167
|
+
"name": {"ja": "温度", "en": "Temperature"},
|
|
168
|
+
"schema": {"type": "number"},
|
|
169
|
+
"unit": "K"
|
|
170
|
+
},
|
|
171
|
+
"operator": {
|
|
172
|
+
"name": {"ja": "測定者", "en": "Operator"},
|
|
173
|
+
"schema": {"type": "string"}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
```
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
root: dict[str, MetadataDefEntry]
|
rdetoolkit/models/metadata.pyi
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pydantic import BaseModel, RootModel
|
|
1
|
+
from pydantic import AnyUrl, BaseModel, RootModel
|
|
2
2
|
from typing import Any, Final
|
|
3
3
|
|
|
4
4
|
MAX_VALUE_SIZE: Final[int]
|
|
@@ -20,3 +20,24 @@ class ValidableItems(RootModel):
|
|
|
20
20
|
class MetadataItem(BaseModel):
|
|
21
21
|
constant: dict[str, MetaValue]
|
|
22
22
|
variable: ValidableItems
|
|
23
|
+
|
|
24
|
+
class NameField(BaseModel):
|
|
25
|
+
ja: str
|
|
26
|
+
en: str
|
|
27
|
+
|
|
28
|
+
class SchemaField(BaseModel):
|
|
29
|
+
type: str
|
|
30
|
+
format: str | None
|
|
31
|
+
|
|
32
|
+
class MetadataDefEntry(BaseModel):
|
|
33
|
+
name: NameField
|
|
34
|
+
schema_field: SchemaField
|
|
35
|
+
unit: str | None
|
|
36
|
+
description: str | None
|
|
37
|
+
uri: AnyUrl | None
|
|
38
|
+
mode: str | None
|
|
39
|
+
order: int | None
|
|
40
|
+
original_name: str | None
|
|
41
|
+
|
|
42
|
+
class MetadataDefinition(RootModel):
|
|
43
|
+
root: dict[str, MetadataDefEntry]
|
|
@@ -411,6 +411,7 @@ class SmartTableInvoiceInitializer(Processor):
|
|
|
411
411
|
"""Apply SmartTable row data to invoice and collect metadata updates."""
|
|
412
412
|
metadata_updates: dict[str, dict[str, Any]] = {}
|
|
413
413
|
metadata_def: dict[str, Any] | None = None
|
|
414
|
+
csv_has_sample_owner_id = False
|
|
414
415
|
|
|
415
416
|
# Handle empty CSV (no data rows)
|
|
416
417
|
if len(csv_data) == 0:
|
|
@@ -435,10 +436,50 @@ class SmartTableInvoiceInitializer(Processor):
|
|
|
435
436
|
meta_key, meta_entry = self._process_meta_mapping(col, value, metadata_def)
|
|
436
437
|
metadata_updates[meta_key] = meta_entry
|
|
437
438
|
continue
|
|
439
|
+
# Track if sample/ownerId is explicitly specified in CSV
|
|
440
|
+
if col == "sample/ownerId":
|
|
441
|
+
csv_has_sample_owner_id = True
|
|
438
442
|
self._process_mapping_key(col, value, invoice_data, invoice_schema_json_data)
|
|
439
443
|
|
|
444
|
+
# Set sample.ownerId to basic.dataOwnerId only if not specified in CSV
|
|
445
|
+
if not csv_has_sample_owner_id:
|
|
446
|
+
self._set_sample_owner_id(invoice_data)
|
|
447
|
+
|
|
440
448
|
return metadata_updates
|
|
441
449
|
|
|
450
|
+
def _set_sample_owner_id(self, invoice_data: dict[str, Any]) -> None:
|
|
451
|
+
"""Set sample.ownerId to basic.dataOwnerId for SmartTable processing.
|
|
452
|
+
|
|
453
|
+
This ensures that newly registered samples have the correct owner ID,
|
|
454
|
+
which should always be the data owner (registrant) rather than
|
|
455
|
+
any temporary sample owner selected in the invoice screen.
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
invoice_data: Invoice data dictionary to update.
|
|
459
|
+
|
|
460
|
+
Note:
|
|
461
|
+
- For new sample registration: Sets the correct owner ID
|
|
462
|
+
- For sample linking: The value is set but not used (safe to set)
|
|
463
|
+
- If basic.dataOwnerId is missing: Logs warning and preserves existing value
|
|
464
|
+
"""
|
|
465
|
+
basic_section = invoice_data.get("basic", {})
|
|
466
|
+
data_owner_id = basic_section.get("dataOwnerId")
|
|
467
|
+
|
|
468
|
+
if data_owner_id is None or data_owner_id == "":
|
|
469
|
+
logger.warning(
|
|
470
|
+
"basic.dataOwnerId is missing or empty; sample.ownerId will not be updated. "
|
|
471
|
+
"This may cause incorrect sample owner assignment.",
|
|
472
|
+
)
|
|
473
|
+
return
|
|
474
|
+
|
|
475
|
+
sample_section = invoice_data.setdefault("sample", {})
|
|
476
|
+
sample_section["ownerId"] = data_owner_id
|
|
477
|
+
|
|
478
|
+
logger.debug(
|
|
479
|
+
"Set sample.ownerId to basic.dataOwnerId: %s",
|
|
480
|
+
data_owner_id,
|
|
481
|
+
)
|
|
482
|
+
|
|
442
483
|
def _load_metadata_definition(self, metadata_def_path: Path) -> dict[str, Any]:
|
|
443
484
|
"""Load metadata definitions for SmartTable meta column processing.
|
|
444
485
|
|
rdetoolkit/validation.py
CHANGED
|
@@ -29,7 +29,19 @@ def _pydantic_validation_error() -> type[PydanticValidationError]:
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class MetadataValidator:
|
|
32
|
+
"""Validator for metadata files (metadata.json).
|
|
33
|
+
|
|
34
|
+
This validator checks metadata.json files against the
|
|
35
|
+
MetadataItem Pydantic model, ensuring proper structure
|
|
36
|
+
for actual metadata data.
|
|
37
|
+
|
|
38
|
+
Note:
|
|
39
|
+
This is separate from MetadataDefinitionValidator which validates
|
|
40
|
+
metadata-def.json files (metadata definitions).
|
|
41
|
+
"""
|
|
42
|
+
|
|
32
43
|
def __init__(self) -> None:
|
|
44
|
+
"""Initialize metadata validator with schema."""
|
|
33
45
|
from rdetoolkit.models.metadata import MetadataItem
|
|
34
46
|
|
|
35
47
|
self.schema = MetadataItem
|
|
@@ -59,28 +71,102 @@ class MetadataValidator:
|
|
|
59
71
|
|
|
60
72
|
if path is not None:
|
|
61
73
|
__data = readf_json(path)
|
|
62
|
-
elif json_obj is not None:
|
|
63
|
-
__data = json_obj
|
|
64
74
|
else:
|
|
65
|
-
|
|
66
|
-
raise ValueError(emsg)
|
|
75
|
+
__data = json_obj
|
|
67
76
|
|
|
68
77
|
self.schema(**__data)
|
|
69
78
|
return __data
|
|
70
79
|
|
|
71
80
|
|
|
81
|
+
class MetadataDefinitionValidator:
|
|
82
|
+
"""Validator for metadata definition files (metadata-def.json).
|
|
83
|
+
|
|
84
|
+
This validator checks metadata-def.json files against the
|
|
85
|
+
MetadataDefinition Pydantic model, ensuring proper structure
|
|
86
|
+
for metadata definitions.
|
|
87
|
+
|
|
88
|
+
Note:
|
|
89
|
+
This is separate from MetadataValidator which validates
|
|
90
|
+
metadata.json files (actual metadata data).
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self) -> None:
|
|
94
|
+
"""Initialize metadata definition validator with schema."""
|
|
95
|
+
from rdetoolkit.models.metadata import MetadataDefinition
|
|
96
|
+
|
|
97
|
+
self.schema = MetadataDefinition
|
|
98
|
+
|
|
99
|
+
def validate(
|
|
100
|
+
self,
|
|
101
|
+
*,
|
|
102
|
+
path: str | Path | None = None,
|
|
103
|
+
json_obj: dict[str, Any] | None = None,
|
|
104
|
+
) -> dict[str, Any]:
|
|
105
|
+
"""Validate metadata definition JSON against schema.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
path: Path to metadata-def.json file to validate
|
|
109
|
+
json_obj: JSON object to validate (alternative to path)
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Validated JSON data as dict
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
ValueError: If neither path nor json_obj provided, or both provided
|
|
116
|
+
MetadataValidationError: If validation fails with detailed error info
|
|
117
|
+
|
|
118
|
+
Examples:
|
|
119
|
+
>>> validator = MetadataDefinitionValidator()
|
|
120
|
+
>>> data = validator.validate(path="metadata-def.json")
|
|
121
|
+
"""
|
|
122
|
+
# Input validation
|
|
123
|
+
if path is None and json_obj is None:
|
|
124
|
+
emsg = "At least one of 'path' or 'json_obj' must be provided"
|
|
125
|
+
raise ValueError(emsg)
|
|
126
|
+
if path is not None and json_obj is not None:
|
|
127
|
+
emsg = "Both 'path' and 'json_obj' cannot be provided at the same time"
|
|
128
|
+
raise ValueError(emsg)
|
|
129
|
+
|
|
130
|
+
# Load data
|
|
131
|
+
if path is not None:
|
|
132
|
+
__data = readf_json(path)
|
|
133
|
+
else:
|
|
134
|
+
__data = json_obj
|
|
135
|
+
|
|
136
|
+
# Validate with Pydantic model
|
|
137
|
+
try:
|
|
138
|
+
self.schema(__data)
|
|
139
|
+
except _pydantic_validation_error() as validation_error:
|
|
140
|
+
# Format error message for metadata-def.json
|
|
141
|
+
emsg = "Validation Errors in metadata-def.json. Please correct the following fields\n"
|
|
142
|
+
for idx, error in enumerate(validation_error.errors(), start=1):
|
|
143
|
+
# Extract field path (e.g., ['key', 'name', 'ja'])
|
|
144
|
+
field_path = ".".join([str(e) for e in error["loc"]])
|
|
145
|
+
emsg += f"{idx}. Field: {field_path}\n"
|
|
146
|
+
emsg += f" Type: {error['type']}\n"
|
|
147
|
+
emsg += f" Context: {error['msg']}\n"
|
|
148
|
+
raise MetadataValidationError(emsg) from validation_error
|
|
149
|
+
|
|
150
|
+
return __data
|
|
151
|
+
|
|
152
|
+
|
|
72
153
|
def metadata_validate(path: str | Path) -> None:
|
|
73
154
|
"""Validate metadata.json file.
|
|
74
155
|
|
|
75
|
-
This function validates the metadata
|
|
76
|
-
It checks if the file exists and then uses
|
|
156
|
+
This function validates the metadata.json file (actual metadata data)
|
|
157
|
+
specified by the given path. It checks if the file exists and then uses
|
|
158
|
+
MetadataValidator to validate the file against the MetadataItem schema.
|
|
159
|
+
|
|
160
|
+
Note:
|
|
161
|
+
This function is for metadata.json files. For metadata-def.json
|
|
162
|
+
files, use MetadataDefinitionValidator instead.
|
|
77
163
|
|
|
78
164
|
Args:
|
|
79
|
-
path (Union[str, Path]): The path to the metadata
|
|
165
|
+
path (Union[str, Path]): The path to the metadata.json file.
|
|
80
166
|
|
|
81
167
|
Raises:
|
|
82
168
|
FileNotFoundError: If the schema and path do not exist.
|
|
83
|
-
MetadataValidationError: If there is an error in validating the metadata
|
|
169
|
+
MetadataValidationError: If there is an error in validating the metadata file.
|
|
84
170
|
"""
|
|
85
171
|
if isinstance(path, str):
|
|
86
172
|
path = Path(path)
|
rdetoolkit/validation.pyi
CHANGED
|
@@ -8,6 +8,11 @@ class MetadataValidator:
|
|
|
8
8
|
def __init__(self) -> None: ...
|
|
9
9
|
def validate(self, *, path: str | Path | None = None, json_obj: dict[str, Any] | None = None) -> dict[str, Any]: ...
|
|
10
10
|
|
|
11
|
+
class MetadataDefinitionValidator:
|
|
12
|
+
schema: Incomplete
|
|
13
|
+
def __init__(self) -> None: ...
|
|
14
|
+
def validate(self, *, path: str | Path | None = None, json_obj: dict[str, Any] | None = None) -> dict[str, Any]: ...
|
|
15
|
+
|
|
11
16
|
def metadata_validate(path: str | Path) -> None: ...
|
|
12
17
|
|
|
13
18
|
class InvoiceValidator:
|
rdetoolkit/workflows.py
CHANGED
|
@@ -419,8 +419,14 @@ def run(*, custom_dataset_function: DatasetCallback | None = None, config: Confi
|
|
|
419
419
|
Exception: If a generic error occurs during the process.
|
|
420
420
|
|
|
421
421
|
Note:
|
|
422
|
-
|
|
423
|
-
|
|
422
|
+
Execution mode is selected in the following order:
|
|
423
|
+
|
|
424
|
+
1. SmartTable CSV is present (`smarttable_file` is not None) -> `SmartTableInvoice` mode.
|
|
425
|
+
2. Excel invoice bundle is provided (`excel_invoice_files` is not None) -> `Excelinvoice` mode.
|
|
426
|
+
3. `extended_mode` matches (case-insensitive) `rdeformat` or `MultiDataTile` -> the corresponding extended mode.
|
|
427
|
+
4. Otherwise -> `Invoice` mode.
|
|
428
|
+
|
|
429
|
+
The mode name recorded in logs/results matches the branch that executed. No `excelinvoice` value is accepted in `extended_mode`.
|
|
424
430
|
|
|
425
431
|
Example:
|
|
426
432
|
```python
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rdetoolkit
|
|
3
|
-
Version: 1.5.
|
|
3
|
+
Version: 1.5.3
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Programming Language :: Python
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -36,6 +36,7 @@ Requires-Dist: pyarrow>=19.0.0
|
|
|
36
36
|
Requires-Dist: pip>=24.3.1
|
|
37
37
|
Requires-Dist: rpds-py>=0.26
|
|
38
38
|
Requires-Dist: markdown>=3.7
|
|
39
|
+
Requires-Dist: pytz>=2024.1
|
|
39
40
|
Requires-Dist: types-pytz>=2025.2.0.20250326
|
|
40
41
|
Requires-Dist: matplotlib>=3.9.4
|
|
41
42
|
Requires-Dist: minio>=7.2.15 ; extra == 'minio'
|
|
@@ -51,11 +52,11 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
51
52
|
Project-URL: Bug Tracker, https://github.com/nims-dpfc/rdetoolkit
|
|
52
53
|
Project-URL: Homepage, https://github.com/nims-dpfc/rdetoolkit
|
|
53
54
|
|
|
54
|
-

|
|
55
56
|
[](https://www.python.org/downloads/release/python-3917/)
|
|
56
|
-
[](https://github.com/nims-
|
|
57
|
-
[](https://github.com/nims-
|
|
58
|
-
](https://github.com/nims-mdpf/rdetoolkit/blob/main/LICENSE)
|
|
58
|
+
[](https://github.com/nims-mdpf/rdetoolkit/issues)
|
|
59
|
+

|
|
59
60
|

|
|
60
61
|
|
|
61
62
|
> [日本語ドキュメント](docs/README_ja.md)
|
|
@@ -76,6 +77,13 @@ If you wish to make changes, please read the following document first:
|
|
|
76
77
|
|
|
77
78
|
- [CONTRIBUTING.md](https://github.com/nims-mdpf/rdetoolkit/blob/main/CONTRIBUTING.md)
|
|
78
79
|
|
|
80
|
+
## Requirements
|
|
81
|
+
|
|
82
|
+
- **Python**: 3.9 or higher (Python 3.9 support will be removed in v2.0; upgrade to Python 3.10+ recommended)
|
|
83
|
+
|
|
84
|
+
!!! warning "Python 3.9 Deprecation"
|
|
85
|
+
Python 3.9 support is deprecated and will be removed in rdetoolkit v2.0. While Python 3.9 continues to work in rdetoolkit 1.x, users will see a `DeprecationWarning` on import. Please plan to upgrade to Python 3.10 or later before the v2.0 release.
|
|
86
|
+
|
|
79
87
|
## Install
|
|
80
88
|
|
|
81
89
|
To install, run the following command:
|
|
@@ -143,20 +151,20 @@ def dataset(paths: RdeDatasetPaths) -> None:
|
|
|
143
151
|
...
|
|
144
152
|
```
|
|
145
153
|
|
|
146
|
-
In this example, we define a dummy function `
|
|
154
|
+
In this example, we define a dummy function `display_message()` under `modules` to demonstrate how to implement custom structuring processing. Create a file named `modules/modules.py` as follows:
|
|
147
155
|
|
|
148
156
|
```python
|
|
149
157
|
# modules/modules.py
|
|
150
158
|
from rdetoolkit.models.rde2types import RdeDatasetPaths
|
|
151
159
|
|
|
152
160
|
|
|
153
|
-
def
|
|
161
|
+
def display_message(path):
|
|
154
162
|
print(f"Test Message!: {path}")
|
|
155
163
|
|
|
156
164
|
|
|
157
165
|
def dataset(paths: RdeDatasetPaths) -> None:
|
|
158
|
-
|
|
159
|
-
|
|
166
|
+
display_message(paths.inputdata)
|
|
167
|
+
display_message(paths.struct)
|
|
160
168
|
```
|
|
161
169
|
|
|
162
170
|
### About the Entry Point
|