esgvoc 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esgvoc/__init__.py +3 -0
- esgvoc/api/__init__.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
- esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
- esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
- esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
- esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
- esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
- esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
- esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
- esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
- esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
- esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
- esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
- esgvoc/api/data_descriptors/__init__.py +159 -0
- esgvoc/api/data_descriptors/activity.py +72 -0
- esgvoc/api/data_descriptors/archive.py +5 -0
- esgvoc/api/data_descriptors/area_label.py +30 -0
- esgvoc/api/data_descriptors/branded_suffix.py +30 -0
- esgvoc/api/data_descriptors/branded_variable.py +21 -0
- esgvoc/api/data_descriptors/citation_url.py +5 -0
- esgvoc/api/data_descriptors/contact.py +5 -0
- esgvoc/api/data_descriptors/conventions.py +28 -0
- esgvoc/api/data_descriptors/creation_date.py +18 -0
- esgvoc/api/data_descriptors/data_descriptor.py +127 -0
- esgvoc/api/data_descriptors/data_specs_version.py +25 -0
- esgvoc/api/data_descriptors/date.py +5 -0
- esgvoc/api/data_descriptors/directory_date.py +22 -0
- esgvoc/api/data_descriptors/drs_specs.py +38 -0
- esgvoc/api/data_descriptors/experiment.py +215 -0
- esgvoc/api/data_descriptors/forcing_index.py +21 -0
- esgvoc/api/data_descriptors/frequency.py +48 -0
- esgvoc/api/data_descriptors/further_info_url.py +5 -0
- esgvoc/api/data_descriptors/grid.py +43 -0
- esgvoc/api/data_descriptors/horizontal_label.py +20 -0
- esgvoc/api/data_descriptors/initialization_index.py +27 -0
- esgvoc/api/data_descriptors/institution.py +80 -0
- esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
- esgvoc/api/data_descriptors/license.py +31 -0
- esgvoc/api/data_descriptors/member_id.py +9 -0
- esgvoc/api/data_descriptors/mip_era.py +26 -0
- esgvoc/api/data_descriptors/model_component.py +32 -0
- esgvoc/api/data_descriptors/models_test/models.py +17 -0
- esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
- esgvoc/api/data_descriptors/obs_type.py +5 -0
- esgvoc/api/data_descriptors/organisation.py +22 -0
- esgvoc/api/data_descriptors/physics_index.py +21 -0
- esgvoc/api/data_descriptors/product.py +16 -0
- esgvoc/api/data_descriptors/publication_status.py +5 -0
- esgvoc/api/data_descriptors/realization_index.py +24 -0
- esgvoc/api/data_descriptors/realm.py +16 -0
- esgvoc/api/data_descriptors/regex.py +5 -0
- esgvoc/api/data_descriptors/region.py +35 -0
- esgvoc/api/data_descriptors/resolution.py +7 -0
- esgvoc/api/data_descriptors/source.py +120 -0
- esgvoc/api/data_descriptors/source_type.py +5 -0
- esgvoc/api/data_descriptors/sub_experiment.py +5 -0
- esgvoc/api/data_descriptors/table.py +28 -0
- esgvoc/api/data_descriptors/temporal_label.py +20 -0
- esgvoc/api/data_descriptors/time_range.py +17 -0
- esgvoc/api/data_descriptors/title.py +5 -0
- esgvoc/api/data_descriptors/tracking_id.py +67 -0
- esgvoc/api/data_descriptors/variable.py +56 -0
- esgvoc/api/data_descriptors/variant_label.py +25 -0
- esgvoc/api/data_descriptors/vertical_label.py +20 -0
- esgvoc/api/project_specs.py +143 -0
- esgvoc/api/projects.py +1253 -0
- esgvoc/api/py.typed +0 -0
- esgvoc/api/pydantic_handler.py +146 -0
- esgvoc/api/report.py +127 -0
- esgvoc/api/search.py +171 -0
- esgvoc/api/universe.py +434 -0
- esgvoc/apps/__init__.py +6 -0
- esgvoc/apps/cmor_tables/__init__.py +7 -0
- esgvoc/apps/cmor_tables/cvs_table.py +948 -0
- esgvoc/apps/drs/__init__.py +0 -0
- esgvoc/apps/drs/constants.py +2 -0
- esgvoc/apps/drs/generator.py +429 -0
- esgvoc/apps/drs/report.py +540 -0
- esgvoc/apps/drs/validator.py +312 -0
- esgvoc/apps/ga/__init__.py +104 -0
- esgvoc/apps/ga/example_usage.py +315 -0
- esgvoc/apps/ga/models/__init__.py +47 -0
- esgvoc/apps/ga/models/netcdf_header.py +306 -0
- esgvoc/apps/ga/models/validator.py +491 -0
- esgvoc/apps/ga/test_ga.py +161 -0
- esgvoc/apps/ga/validator.py +277 -0
- esgvoc/apps/jsg/json_schema_generator.py +341 -0
- esgvoc/apps/jsg/templates/template.jinja +241 -0
- esgvoc/apps/test_cv/README.md +214 -0
- esgvoc/apps/test_cv/__init__.py +0 -0
- esgvoc/apps/test_cv/cv_tester.py +1611 -0
- esgvoc/apps/test_cv/example_usage.py +216 -0
- esgvoc/apps/vr/__init__.py +12 -0
- esgvoc/apps/vr/build_variable_registry.py +71 -0
- esgvoc/apps/vr/example_usage.py +60 -0
- esgvoc/apps/vr/vr_app.py +333 -0
- esgvoc/cli/clean.py +304 -0
- esgvoc/cli/cmor.py +46 -0
- esgvoc/cli/config.py +1300 -0
- esgvoc/cli/drs.py +267 -0
- esgvoc/cli/find.py +138 -0
- esgvoc/cli/get.py +155 -0
- esgvoc/cli/install.py +41 -0
- esgvoc/cli/main.py +60 -0
- esgvoc/cli/offline.py +269 -0
- esgvoc/cli/status.py +79 -0
- esgvoc/cli/test_cv.py +258 -0
- esgvoc/cli/valid.py +147 -0
- esgvoc/core/constants.py +17 -0
- esgvoc/core/convert.py +0 -0
- esgvoc/core/data_handler.py +206 -0
- esgvoc/core/db/__init__.py +3 -0
- esgvoc/core/db/connection.py +40 -0
- esgvoc/core/db/models/mixins.py +25 -0
- esgvoc/core/db/models/project.py +102 -0
- esgvoc/core/db/models/universe.py +98 -0
- esgvoc/core/db/project_ingestion.py +231 -0
- esgvoc/core/db/universe_ingestion.py +172 -0
- esgvoc/core/exceptions.py +33 -0
- esgvoc/core/logging_handler.py +26 -0
- esgvoc/core/repo_fetcher.py +345 -0
- esgvoc/core/service/__init__.py +41 -0
- esgvoc/core/service/configuration/config_manager.py +196 -0
- esgvoc/core/service/configuration/setting.py +363 -0
- esgvoc/core/service/data_merger.py +634 -0
- esgvoc/core/service/esg_voc.py +77 -0
- esgvoc/core/service/resolver_config.py +56 -0
- esgvoc/core/service/state.py +324 -0
- esgvoc/core/service/string_heuristics.py +98 -0
- esgvoc/core/service/term_cache.py +108 -0
- esgvoc/core/service/uri_resolver.py +133 -0
- esgvoc-2.0.2.dist-info/METADATA +82 -0
- esgvoc-2.0.2.dist-info/RECORD +147 -0
- esgvoc-2.0.2.dist-info/WHEEL +4 -0
- esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
- esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
esgvoc/api/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, Annotated, Any, Iterable, Type, Union, get_args, get_origin
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Discriminator, Tag, TypeAdapter
|
|
4
|
+
|
|
5
|
+
import esgvoc.core.constants as api_settings
|
|
6
|
+
from esgvoc.core.exceptions import EsgvocDbError
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
|
|
10
|
+
from esgvoc.core.db.models.project import PTerm
|
|
11
|
+
from esgvoc.core.db.models.universe import UTerm
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_union(*classes: Type[BaseModel]):
|
|
15
|
+
"""
|
|
16
|
+
Create a Union type with automatic property-based discrimination.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
*classes: BaseModel classes to include in the union (order matters - most specific first)
|
|
20
|
+
name: Optional name for the union type (used for debugging)
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
An Annotated Union type with a discriminator that checks required properties
|
|
24
|
+
"""
|
|
25
|
+
classes_list = list(classes)
|
|
26
|
+
|
|
27
|
+
def property_discriminator(v: Any) -> str:
|
|
28
|
+
"""Generic discriminator that checks which class has matching required fields."""
|
|
29
|
+
if not isinstance(v, dict):
|
|
30
|
+
return v.__class__.__name__
|
|
31
|
+
|
|
32
|
+
# Get the input fields
|
|
33
|
+
input_fields = set(v.keys())
|
|
34
|
+
|
|
35
|
+
# Track which models failed and why
|
|
36
|
+
failed_matches = []
|
|
37
|
+
|
|
38
|
+
# Try each class and see which one's required fields match
|
|
39
|
+
for cls in classes_list:
|
|
40
|
+
# Get required fields for this class (excluding nullable fields)
|
|
41
|
+
required_fields = set()
|
|
42
|
+
for field_name, field_info in cls.model_fields.items():
|
|
43
|
+
# Only consider fields that are required AND not nullable
|
|
44
|
+
if field_info.is_required():
|
|
45
|
+
# Check if None is allowed in the field type
|
|
46
|
+
annotation = field_info.annotation
|
|
47
|
+
is_nullable = False
|
|
48
|
+
|
|
49
|
+
# Check for Optional[X] or X | None patterns using get_origin and get_args
|
|
50
|
+
origin = get_origin(annotation)
|
|
51
|
+
if origin is Union:
|
|
52
|
+
# Check if None is in the union args
|
|
53
|
+
args = get_args(annotation)
|
|
54
|
+
is_nullable = type(None) in args
|
|
55
|
+
|
|
56
|
+
# Only add to required fields if not nullable
|
|
57
|
+
if not is_nullable:
|
|
58
|
+
required_fields.add(field_name)
|
|
59
|
+
|
|
60
|
+
# Check if all required fields are present in input
|
|
61
|
+
missing_fields = required_fields - input_fields
|
|
62
|
+
if not missing_fields:
|
|
63
|
+
return cls.__name__
|
|
64
|
+
else:
|
|
65
|
+
failed_matches.append((cls.__name__, sorted(missing_fields)))
|
|
66
|
+
|
|
67
|
+
# If no model matched, raise a helpful error
|
|
68
|
+
error_parts = ["Could not discriminate union type. No model matched the input data."]
|
|
69
|
+
error_parts.append(f"Input fields: {sorted(input_fields)}")
|
|
70
|
+
error_parts.append("\nAttempted models:")
|
|
71
|
+
for model_name, missing in failed_matches:
|
|
72
|
+
error_parts.append(f" - {model_name}: missing required fields {missing}")
|
|
73
|
+
|
|
74
|
+
raise ValueError("\n".join(error_parts))
|
|
75
|
+
|
|
76
|
+
# Create annotated versions with tags
|
|
77
|
+
tagged_classes = tuple(Annotated[cls, Tag(cls.__name__)] for cls in classes_list)
|
|
78
|
+
|
|
79
|
+
# Create Union dynamically
|
|
80
|
+
union_type = Union.__getitem__(tagged_classes)
|
|
81
|
+
|
|
82
|
+
return Annotated[union_type, Discriminator(property_discriminator)]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_pydantic_class(data_descriptor_id_or_term_type: str) -> type["DataDescriptor"]:
|
|
86
|
+
"""
|
|
87
|
+
Get the Pydantic class for a given data descriptor ID or term type.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
data_descriptor_id_or_term_type: The identifier of the data descriptor or term type
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The corresponding Pydantic DataDescriptor class
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
EsgvocDbError: If no matching pydantic class is found
|
|
97
|
+
"""
|
|
98
|
+
from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
|
|
99
|
+
|
|
100
|
+
if data_descriptor_id_or_term_type in DATA_DESCRIPTOR_CLASS_MAPPING:
|
|
101
|
+
return DATA_DESCRIPTOR_CLASS_MAPPING[data_descriptor_id_or_term_type]
|
|
102
|
+
else:
|
|
103
|
+
raise EsgvocDbError(f"'{data_descriptor_id_or_term_type}' pydantic class not found")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def instantiate_pydantic_term(term: "UTerm | PTerm", selected_term_fields: Iterable[str] | None) -> "DataDescriptor":
|
|
107
|
+
"""
|
|
108
|
+
Instantiate a Pydantic DataDescriptor from a database term.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
term: The database term (UTerm or PTerm) to instantiate
|
|
112
|
+
selected_term_fields: Optional list of specific fields to include. If None, all fields are included.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
A DataDescriptor instance (either DataDescriptorSubSet or the full model)
|
|
116
|
+
"""
|
|
117
|
+
from esgvoc.api.data_descriptors.data_descriptor import DataDescriptorSubSet
|
|
118
|
+
|
|
119
|
+
type = term.specs[api_settings.TERM_TYPE_JSON_KEY]
|
|
120
|
+
if selected_term_fields is not None:
|
|
121
|
+
subset = DataDescriptorSubSet(id=term.id, type=type)
|
|
122
|
+
|
|
123
|
+
# Get model field defaults to use when fields are missing from term.specs
|
|
124
|
+
model_fields = DataDescriptorSubSet.model_fields
|
|
125
|
+
|
|
126
|
+
for field in selected_term_fields:
|
|
127
|
+
# Use model's default value if field is missing from specs
|
|
128
|
+
if field in model_fields and field not in term.specs:
|
|
129
|
+
default_value = model_fields[field].default
|
|
130
|
+
setattr(subset, field, default_value if default_value is not None else term.specs.get(field, None))
|
|
131
|
+
else:
|
|
132
|
+
setattr(subset, field, term.specs.get(field, None))
|
|
133
|
+
|
|
134
|
+
for field in DataDescriptorSubSet.MANDATORY_TERM_FIELDS:
|
|
135
|
+
# Use model's default value if field is missing from specs
|
|
136
|
+
if field in model_fields and field not in term.specs:
|
|
137
|
+
default_value = model_fields[field].default
|
|
138
|
+
setattr(subset, field, default_value if default_value is not None else term.specs.get(field, None))
|
|
139
|
+
else:
|
|
140
|
+
setattr(subset, field, term.specs.get(field, None))
|
|
141
|
+
return subset
|
|
142
|
+
else:
|
|
143
|
+
term_class = get_pydantic_class(type)
|
|
144
|
+
|
|
145
|
+
adapter = TypeAdapter(term_class)
|
|
146
|
+
return adapter.validate_python(term.specs)
|
esgvoc/api/report.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any, Protocol
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, computed_field
|
|
5
|
+
|
|
6
|
+
import esgvoc.core.constants as api_settings
|
|
7
|
+
from esgvoc.core.db.models.mixins import TermKind
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ValidationErrorVisitor(Protocol):
|
|
11
|
+
"""
|
|
12
|
+
Specifications for a term validation error visitor.
|
|
13
|
+
"""
|
|
14
|
+
def visit_universe_term_error(self, error: "UniverseTermError") -> Any:
|
|
15
|
+
"""Visit a universe term error."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
def visit_project_term_error(self, error: "ProjectTermError") -> Any:
|
|
19
|
+
"""Visit a project term error."""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ValidationError(BaseModel, ABC):
|
|
24
|
+
"""
|
|
25
|
+
Generic class for the term validation error.
|
|
26
|
+
"""
|
|
27
|
+
value: str
|
|
28
|
+
"""The given value that is invalid."""
|
|
29
|
+
term: dict
|
|
30
|
+
"""JSON specification of the term."""
|
|
31
|
+
term_kind: TermKind
|
|
32
|
+
"""The kind of term."""
|
|
33
|
+
@computed_field # type: ignore
|
|
34
|
+
@property
|
|
35
|
+
def class_name(self) -> str:
|
|
36
|
+
"""The class name of the issue for JSON serialization."""
|
|
37
|
+
return self.__class__.__name__
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def accept(self, visitor: ValidationErrorVisitor) -> Any:
|
|
41
|
+
"""
|
|
42
|
+
Accept a validation error visitor.
|
|
43
|
+
|
|
44
|
+
:param visitor: The validation error visitor.
|
|
45
|
+
:type visitor: ValidationErrorVisitor
|
|
46
|
+
:return: Depending on the visitor.
|
|
47
|
+
:rtype: Any
|
|
48
|
+
"""
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class UniverseTermError(ValidationError):
|
|
53
|
+
"""
|
|
54
|
+
A validation error on a term from the universe.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
data_descriptor_id: str
|
|
58
|
+
"""The data descriptor that the term belongs."""
|
|
59
|
+
|
|
60
|
+
def accept(self, visitor: ValidationErrorVisitor) -> Any:
|
|
61
|
+
return visitor.visit_universe_term_error(self)
|
|
62
|
+
|
|
63
|
+
def __str__(self) -> str:
|
|
64
|
+
term_id = self.term[api_settings.TERM_ID_JSON_KEY]
|
|
65
|
+
result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} " + \
|
|
66
|
+
f"does not validate the given value '{self.value}'"
|
|
67
|
+
return result
|
|
68
|
+
|
|
69
|
+
def __repr__(self) -> str:
|
|
70
|
+
return self.__str__()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ProjectTermError(ValidationError):
|
|
74
|
+
"""
|
|
75
|
+
A validation error on a term from a project.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
collection_id: str
|
|
79
|
+
"""The collection id that the term belongs"""
|
|
80
|
+
|
|
81
|
+
def accept(self, visitor: ValidationErrorVisitor) -> Any:
|
|
82
|
+
return visitor.visit_project_term_error(self)
|
|
83
|
+
|
|
84
|
+
def __str__(self) -> str:
|
|
85
|
+
term_id = self.term[api_settings.TERM_ID_JSON_KEY]
|
|
86
|
+
result = f"The term {term_id} from the collection {self.collection_id} " + \
|
|
87
|
+
f"does not validate the given value '{self.value}'"
|
|
88
|
+
return result
|
|
89
|
+
|
|
90
|
+
def __repr__(self) -> str:
|
|
91
|
+
return self.__str__()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ValidationReport(BaseModel):
|
|
95
|
+
"""
|
|
96
|
+
Term validation report.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
expression: str
|
|
100
|
+
"""The given expression."""
|
|
101
|
+
|
|
102
|
+
errors: list[UniverseTermError | ProjectTermError]
|
|
103
|
+
"""The validation errors."""
|
|
104
|
+
|
|
105
|
+
@computed_field # type: ignore
|
|
106
|
+
@property
|
|
107
|
+
def nb_errors(self) -> int:
|
|
108
|
+
"""The number of validation errors."""
|
|
109
|
+
return len(self.errors) if self.errors else 0
|
|
110
|
+
|
|
111
|
+
@computed_field # type: ignore
|
|
112
|
+
@property
|
|
113
|
+
def validated(self) -> bool:
|
|
114
|
+
"""The expression is validated or not."""
|
|
115
|
+
return False if self.errors else True
|
|
116
|
+
|
|
117
|
+
def __len__(self) -> int:
|
|
118
|
+
return self.nb_errors
|
|
119
|
+
|
|
120
|
+
def __bool__(self) -> bool:
|
|
121
|
+
return self.validated
|
|
122
|
+
|
|
123
|
+
def __str__(self) -> str:
|
|
124
|
+
return f"'{self.expression}' has {self.nb_errors} error(s)"
|
|
125
|
+
|
|
126
|
+
def __repr__(self) -> str:
|
|
127
|
+
return self.__str__()
|
esgvoc/api/search.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import Any, Iterable, MutableSequence, Sequence
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sa
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
from sqlalchemy import ColumnElement
|
|
7
|
+
from sqlalchemy.exc import OperationalError
|
|
8
|
+
from sqlalchemy.sql.expression import Select
|
|
9
|
+
from sqlalchemy.sql.selectable import ExecutableReturnsRows
|
|
10
|
+
from sqlmodel import Column, Field, Session, col
|
|
11
|
+
|
|
12
|
+
import esgvoc.core.service as service
|
|
13
|
+
from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
|
|
14
|
+
from esgvoc.api.pydantic_handler import instantiate_pydantic_term
|
|
15
|
+
from esgvoc.core.db.models.project import PCollectionFTS5, PTerm, PTermFTS5
|
|
16
|
+
from esgvoc.core.db.models.universe import UDataDescriptorFTS5, UTerm, UTermFTS5
|
|
17
|
+
from esgvoc.core.exceptions import EsgvocDbError, EsgvocValueError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ItemKind(Enum):
|
|
21
|
+
DATA_DESCRIPTOR = "data_descriptor"
|
|
22
|
+
"""Corresponds to a data descriptor"""
|
|
23
|
+
COLLECTION = "collection"
|
|
24
|
+
"""Corresponds to a collection"""
|
|
25
|
+
TERM = "term"
|
|
26
|
+
"""Corresponds to a term"""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Item(BaseModel):
|
|
30
|
+
"""An item from the universe or a project (data descriptor, collection or term)."""
|
|
31
|
+
|
|
32
|
+
id: str
|
|
33
|
+
"""The id of the item."""
|
|
34
|
+
kind: ItemKind = Field(sa_column=Column(sa.Enum(ItemKind)))
|
|
35
|
+
"""The kind of the item."""
|
|
36
|
+
parent_id: str
|
|
37
|
+
"""The id of the parent of the item."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_universe_session() -> Session:
|
|
41
|
+
UNIVERSE_DB_CONNECTION = service.current_state.universe.db_connection
|
|
42
|
+
if UNIVERSE_DB_CONNECTION:
|
|
43
|
+
return UNIVERSE_DB_CONNECTION.create_session()
|
|
44
|
+
else:
|
|
45
|
+
raise EsgvocDbError("universe connection is not initialized")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def instantiate_pydantic_terms(
|
|
49
|
+
db_terms: Iterable[UTerm | PTerm],
|
|
50
|
+
list_to_populate: MutableSequence[DataDescriptor],
|
|
51
|
+
selected_term_fields: Iterable[str] | None,
|
|
52
|
+
) -> None:
|
|
53
|
+
for db_term in db_terms:
|
|
54
|
+
try:
|
|
55
|
+
term = instantiate_pydantic_term(db_term, selected_term_fields)
|
|
56
|
+
list_to_populate.append(term)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
# Add context about which term failed
|
|
59
|
+
term_type = db_term.specs.get('type', 'N/A') if hasattr(db_term, 'specs') else 'N/A'
|
|
60
|
+
dd_id = db_term.data_descriptor.id if hasattr(db_term, 'data_descriptor') and db_term.data_descriptor else 'N/A'
|
|
61
|
+
raise ValueError(f"Failed to instantiate term with ID: '{db_term.id}', type: '{term_type}', data_descriptor: '{dd_id}'. Original error: {e}") from e
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def process_expression(expression: str) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
|
|
67
|
+
"""
|
|
68
|
+
# 1. Remove single and double quotes.
|
|
69
|
+
result = expression.replace('"', "")
|
|
70
|
+
result = result.replace("'", "")
|
|
71
|
+
|
|
72
|
+
# 2. Escape keywords.
|
|
73
|
+
result = result.replace("NEAR", '"NEAR"')
|
|
74
|
+
result = result.replace("+", '"+"')
|
|
75
|
+
result = result.replace("-", '"-"')
|
|
76
|
+
result = result.replace(":", '":"')
|
|
77
|
+
result = result.replace("^", '"^"')
|
|
78
|
+
result = result.replace("(", '"("')
|
|
79
|
+
result = result.replace(")", '")"')
|
|
80
|
+
result = result.replace(",", '","')
|
|
81
|
+
|
|
82
|
+
# 3. Make single word request a prefix search.
|
|
83
|
+
if not result.endswith("*"):
|
|
84
|
+
tokens = result.split(sep=None)
|
|
85
|
+
if len(tokens) == 1:
|
|
86
|
+
result += "*"
|
|
87
|
+
return result
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def generate_matching_condition(
|
|
91
|
+
cls: type[UTermFTS5] | type[UDataDescriptorFTS5] | type[PTermFTS5] | type[PCollectionFTS5],
|
|
92
|
+
expression: str,
|
|
93
|
+
only_id: bool,
|
|
94
|
+
) -> ColumnElement[bool]:
|
|
95
|
+
processed_expression = process_expression(expression)
|
|
96
|
+
# TODO: fix this when specs will ba available in collections and Data descriptors.
|
|
97
|
+
if cls is PTermFTS5 or cls is UTermFTS5:
|
|
98
|
+
if only_id:
|
|
99
|
+
result = col(cls.id).match(processed_expression)
|
|
100
|
+
else:
|
|
101
|
+
result = col(cls.specs).match(processed_expression) # type: ignore
|
|
102
|
+
else:
|
|
103
|
+
result = col(cls.id).match(processed_expression)
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def handle_rank_limit_offset(statement: Select, limit: int | None, offset: int | None) -> Select:
|
|
108
|
+
statement = statement.order_by(sa.text("rank"))
|
|
109
|
+
if limit and limit > 0: # False if == 0 and is None ; True if != 0 and is not None.
|
|
110
|
+
statement = statement.limit(limit)
|
|
111
|
+
if offset and offset > 0: # False if == 0 and is None ; True if != 0 and is not None.
|
|
112
|
+
statement = statement.offset(offset)
|
|
113
|
+
return statement
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def execute_match_statement(expression: str, statement: ExecutableReturnsRows, session: Session) -> Sequence:
|
|
117
|
+
try:
|
|
118
|
+
raw_results = session.exec(statement) # type: ignore
|
|
119
|
+
# raw_results.all() returns a list of sqlalquemy rows.
|
|
120
|
+
results = [result[0] for result in raw_results.all()]
|
|
121
|
+
return results
|
|
122
|
+
except OperationalError as e:
|
|
123
|
+
raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def execute_find_item_statements(
|
|
127
|
+
session: Session,
|
|
128
|
+
expression: str,
|
|
129
|
+
first_statement: Select,
|
|
130
|
+
second_statement: Select,
|
|
131
|
+
limit: int | None,
|
|
132
|
+
offset: int | None,
|
|
133
|
+
) -> list[Item]:
|
|
134
|
+
try:
|
|
135
|
+
# Items found are kind of tuple with an object, a kindness, a parent id and a rank.
|
|
136
|
+
first_statement_found = session.exec(first_statement).all() # type: ignore
|
|
137
|
+
second_statement_found = session.exec(second_statement).all() # type: ignore
|
|
138
|
+
tmp_result: list[Any] = list()
|
|
139
|
+
tmp_result.extend(first_statement_found)
|
|
140
|
+
tmp_result.extend(second_statement_found)
|
|
141
|
+
# According to https://sqlite.org/fts5.html#the_bm25_function,
|
|
142
|
+
# "the better matches are assigned numerically lower scores."
|
|
143
|
+
# Sort on the rank column (index 3).
|
|
144
|
+
sorted_tmp_result = sorted(tmp_result, key=lambda r: r[3], reverse=False)
|
|
145
|
+
if offset and offset > 0: # False if == 0 and is None ; True if != 0 and is not None.
|
|
146
|
+
start = offset
|
|
147
|
+
else:
|
|
148
|
+
start = 0
|
|
149
|
+
if limit and limit > 0: # False if == 0 and is None ; True if != 0 and is not None.
|
|
150
|
+
stop = start + limit
|
|
151
|
+
# is OK if stop > len of the list.
|
|
152
|
+
framed_tmp_result = sorted_tmp_result[start:stop]
|
|
153
|
+
else:
|
|
154
|
+
framed_tmp_result = sorted_tmp_result[start:]
|
|
155
|
+
result = [Item(id=r[0], kind=r[1], parent_id=r[2]) for r in framed_tmp_result]
|
|
156
|
+
except OperationalError as e:
|
|
157
|
+
raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
|
|
158
|
+
return result
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class MatchingTerm(BaseModel):
|
|
162
|
+
"""
|
|
163
|
+
Place holder for a term that matches a value (term validation).
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
project_id: str
|
|
167
|
+
"""The project id to which the term belongs."""
|
|
168
|
+
collection_id: str
|
|
169
|
+
"""The collection id to which the term belongs."""
|
|
170
|
+
term_id: str
|
|
171
|
+
"""The term id."""
|