esgvoc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. esgvoc/__init__.py +3 -0
  2. esgvoc/api/__init__.py +91 -0
  3. esgvoc/api/data_descriptors/EMD_models/__init__.py +66 -0
  4. esgvoc/api/data_descriptors/EMD_models/arrangement.py +21 -0
  5. esgvoc/api/data_descriptors/EMD_models/calendar.py +5 -0
  6. esgvoc/api/data_descriptors/EMD_models/cell_variable_type.py +20 -0
  7. esgvoc/api/data_descriptors/EMD_models/component_type.py +5 -0
  8. esgvoc/api/data_descriptors/EMD_models/coordinate.py +52 -0
  9. esgvoc/api/data_descriptors/EMD_models/grid_mapping.py +19 -0
  10. esgvoc/api/data_descriptors/EMD_models/grid_region.py +19 -0
  11. esgvoc/api/data_descriptors/EMD_models/grid_type.py +19 -0
  12. esgvoc/api/data_descriptors/EMD_models/horizontal_computational_grid.py +56 -0
  13. esgvoc/api/data_descriptors/EMD_models/horizontal_grid_cells.py +230 -0
  14. esgvoc/api/data_descriptors/EMD_models/horizontal_subgrid.py +41 -0
  15. esgvoc/api/data_descriptors/EMD_models/horizontal_units.py +5 -0
  16. esgvoc/api/data_descriptors/EMD_models/model.py +139 -0
  17. esgvoc/api/data_descriptors/EMD_models/model_component.py +115 -0
  18. esgvoc/api/data_descriptors/EMD_models/reference.py +61 -0
  19. esgvoc/api/data_descriptors/EMD_models/resolution.py +48 -0
  20. esgvoc/api/data_descriptors/EMD_models/temporal_refinement.py +19 -0
  21. esgvoc/api/data_descriptors/EMD_models/truncation_method.py +17 -0
  22. esgvoc/api/data_descriptors/EMD_models/vertical_computational_grid.py +91 -0
  23. esgvoc/api/data_descriptors/EMD_models/vertical_coordinate.py +5 -0
  24. esgvoc/api/data_descriptors/EMD_models/vertical_units.py +19 -0
  25. esgvoc/api/data_descriptors/__init__.py +159 -0
  26. esgvoc/api/data_descriptors/activity.py +72 -0
  27. esgvoc/api/data_descriptors/archive.py +5 -0
  28. esgvoc/api/data_descriptors/area_label.py +30 -0
  29. esgvoc/api/data_descriptors/branded_suffix.py +30 -0
  30. esgvoc/api/data_descriptors/branded_variable.py +21 -0
  31. esgvoc/api/data_descriptors/citation_url.py +5 -0
  32. esgvoc/api/data_descriptors/contact.py +5 -0
  33. esgvoc/api/data_descriptors/conventions.py +28 -0
  34. esgvoc/api/data_descriptors/creation_date.py +18 -0
  35. esgvoc/api/data_descriptors/data_descriptor.py +127 -0
  36. esgvoc/api/data_descriptors/data_specs_version.py +25 -0
  37. esgvoc/api/data_descriptors/date.py +5 -0
  38. esgvoc/api/data_descriptors/directory_date.py +22 -0
  39. esgvoc/api/data_descriptors/drs_specs.py +38 -0
  40. esgvoc/api/data_descriptors/experiment.py +215 -0
  41. esgvoc/api/data_descriptors/forcing_index.py +21 -0
  42. esgvoc/api/data_descriptors/frequency.py +48 -0
  43. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  44. esgvoc/api/data_descriptors/grid.py +43 -0
  45. esgvoc/api/data_descriptors/horizontal_label.py +20 -0
  46. esgvoc/api/data_descriptors/initialization_index.py +27 -0
  47. esgvoc/api/data_descriptors/institution.py +80 -0
  48. esgvoc/api/data_descriptors/known_branded_variable.py +75 -0
  49. esgvoc/api/data_descriptors/license.py +31 -0
  50. esgvoc/api/data_descriptors/member_id.py +9 -0
  51. esgvoc/api/data_descriptors/mip_era.py +26 -0
  52. esgvoc/api/data_descriptors/model_component.py +32 -0
  53. esgvoc/api/data_descriptors/models_test/models.py +17 -0
  54. esgvoc/api/data_descriptors/nominal_resolution.py +50 -0
  55. esgvoc/api/data_descriptors/obs_type.py +5 -0
  56. esgvoc/api/data_descriptors/organisation.py +22 -0
  57. esgvoc/api/data_descriptors/physics_index.py +21 -0
  58. esgvoc/api/data_descriptors/product.py +16 -0
  59. esgvoc/api/data_descriptors/publication_status.py +5 -0
  60. esgvoc/api/data_descriptors/realization_index.py +24 -0
  61. esgvoc/api/data_descriptors/realm.py +16 -0
  62. esgvoc/api/data_descriptors/regex.py +5 -0
  63. esgvoc/api/data_descriptors/region.py +35 -0
  64. esgvoc/api/data_descriptors/resolution.py +7 -0
  65. esgvoc/api/data_descriptors/source.py +120 -0
  66. esgvoc/api/data_descriptors/source_type.py +5 -0
  67. esgvoc/api/data_descriptors/sub_experiment.py +5 -0
  68. esgvoc/api/data_descriptors/table.py +28 -0
  69. esgvoc/api/data_descriptors/temporal_label.py +20 -0
  70. esgvoc/api/data_descriptors/time_range.py +17 -0
  71. esgvoc/api/data_descriptors/title.py +5 -0
  72. esgvoc/api/data_descriptors/tracking_id.py +67 -0
  73. esgvoc/api/data_descriptors/variable.py +56 -0
  74. esgvoc/api/data_descriptors/variant_label.py +25 -0
  75. esgvoc/api/data_descriptors/vertical_label.py +20 -0
  76. esgvoc/api/project_specs.py +143 -0
  77. esgvoc/api/projects.py +1253 -0
  78. esgvoc/api/py.typed +0 -0
  79. esgvoc/api/pydantic_handler.py +146 -0
  80. esgvoc/api/report.py +127 -0
  81. esgvoc/api/search.py +171 -0
  82. esgvoc/api/universe.py +434 -0
  83. esgvoc/apps/__init__.py +6 -0
  84. esgvoc/apps/cmor_tables/__init__.py +7 -0
  85. esgvoc/apps/cmor_tables/cvs_table.py +948 -0
  86. esgvoc/apps/drs/__init__.py +0 -0
  87. esgvoc/apps/drs/constants.py +2 -0
  88. esgvoc/apps/drs/generator.py +429 -0
  89. esgvoc/apps/drs/report.py +540 -0
  90. esgvoc/apps/drs/validator.py +312 -0
  91. esgvoc/apps/ga/__init__.py +104 -0
  92. esgvoc/apps/ga/example_usage.py +315 -0
  93. esgvoc/apps/ga/models/__init__.py +47 -0
  94. esgvoc/apps/ga/models/netcdf_header.py +306 -0
  95. esgvoc/apps/ga/models/validator.py +491 -0
  96. esgvoc/apps/ga/test_ga.py +161 -0
  97. esgvoc/apps/ga/validator.py +277 -0
  98. esgvoc/apps/jsg/json_schema_generator.py +341 -0
  99. esgvoc/apps/jsg/templates/template.jinja +241 -0
  100. esgvoc/apps/test_cv/README.md +214 -0
  101. esgvoc/apps/test_cv/__init__.py +0 -0
  102. esgvoc/apps/test_cv/cv_tester.py +1611 -0
  103. esgvoc/apps/test_cv/example_usage.py +216 -0
  104. esgvoc/apps/vr/__init__.py +12 -0
  105. esgvoc/apps/vr/build_variable_registry.py +71 -0
  106. esgvoc/apps/vr/example_usage.py +60 -0
  107. esgvoc/apps/vr/vr_app.py +333 -0
  108. esgvoc/cli/clean.py +304 -0
  109. esgvoc/cli/cmor.py +46 -0
  110. esgvoc/cli/config.py +1300 -0
  111. esgvoc/cli/drs.py +267 -0
  112. esgvoc/cli/find.py +138 -0
  113. esgvoc/cli/get.py +155 -0
  114. esgvoc/cli/install.py +41 -0
  115. esgvoc/cli/main.py +60 -0
  116. esgvoc/cli/offline.py +269 -0
  117. esgvoc/cli/status.py +79 -0
  118. esgvoc/cli/test_cv.py +258 -0
  119. esgvoc/cli/valid.py +147 -0
  120. esgvoc/core/constants.py +17 -0
  121. esgvoc/core/convert.py +0 -0
  122. esgvoc/core/data_handler.py +206 -0
  123. esgvoc/core/db/__init__.py +3 -0
  124. esgvoc/core/db/connection.py +40 -0
  125. esgvoc/core/db/models/mixins.py +25 -0
  126. esgvoc/core/db/models/project.py +102 -0
  127. esgvoc/core/db/models/universe.py +98 -0
  128. esgvoc/core/db/project_ingestion.py +231 -0
  129. esgvoc/core/db/universe_ingestion.py +172 -0
  130. esgvoc/core/exceptions.py +33 -0
  131. esgvoc/core/logging_handler.py +26 -0
  132. esgvoc/core/repo_fetcher.py +345 -0
  133. esgvoc/core/service/__init__.py +41 -0
  134. esgvoc/core/service/configuration/config_manager.py +196 -0
  135. esgvoc/core/service/configuration/setting.py +363 -0
  136. esgvoc/core/service/data_merger.py +634 -0
  137. esgvoc/core/service/esg_voc.py +77 -0
  138. esgvoc/core/service/resolver_config.py +56 -0
  139. esgvoc/core/service/state.py +324 -0
  140. esgvoc/core/service/string_heuristics.py +98 -0
  141. esgvoc/core/service/term_cache.py +108 -0
  142. esgvoc/core/service/uri_resolver.py +133 -0
  143. esgvoc-2.0.2.dist-info/METADATA +82 -0
  144. esgvoc-2.0.2.dist-info/RECORD +147 -0
  145. esgvoc-2.0.2.dist-info/WHEEL +4 -0
  146. esgvoc-2.0.2.dist-info/entry_points.txt +2 -0
  147. esgvoc-2.0.2.dist-info/licenses/LICENSE.txt +519 -0
esgvoc/api/py.typed ADDED
File without changes
@@ -0,0 +1,146 @@
1
+ from typing import TYPE_CHECKING, Annotated, Any, Iterable, Type, Union, get_args, get_origin
2
+
3
+ from pydantic import BaseModel, Discriminator, Tag, TypeAdapter
4
+
5
+ import esgvoc.core.constants as api_settings
6
+ from esgvoc.core.exceptions import EsgvocDbError
7
+
8
+ if TYPE_CHECKING:
9
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
10
+ from esgvoc.core.db.models.project import PTerm
11
+ from esgvoc.core.db.models.universe import UTerm
12
+
13
+
14
+ def create_union(*classes: Type[BaseModel]):
15
+ """
16
+ Create a Union type with automatic property-based discrimination.
17
+
18
+ Args:
19
+ *classes: BaseModel classes to include in the union (order matters - most specific first)
20
+ name: Optional name for the union type (used for debugging)
21
+
22
+ Returns:
23
+ An Annotated Union type with a discriminator that checks required properties
24
+ """
25
+ classes_list = list(classes)
26
+
27
+ def property_discriminator(v: Any) -> str:
28
+ """Generic discriminator that checks which class has matching required fields."""
29
+ if not isinstance(v, dict):
30
+ return v.__class__.__name__
31
+
32
+ # Get the input fields
33
+ input_fields = set(v.keys())
34
+
35
+ # Track which models failed and why
36
+ failed_matches = []
37
+
38
+ # Try each class and see which one's required fields match
39
+ for cls in classes_list:
40
+ # Get required fields for this class (excluding nullable fields)
41
+ required_fields = set()
42
+ for field_name, field_info in cls.model_fields.items():
43
+ # Only consider fields that are required AND not nullable
44
+ if field_info.is_required():
45
+ # Check if None is allowed in the field type
46
+ annotation = field_info.annotation
47
+ is_nullable = False
48
+
49
+ # Check for Optional[X] or X | None patterns using get_origin and get_args
50
+ origin = get_origin(annotation)
51
+ if origin is Union:
52
+ # Check if None is in the union args
53
+ args = get_args(annotation)
54
+ is_nullable = type(None) in args
55
+
56
+ # Only add to required fields if not nullable
57
+ if not is_nullable:
58
+ required_fields.add(field_name)
59
+
60
+ # Check if all required fields are present in input
61
+ missing_fields = required_fields - input_fields
62
+ if not missing_fields:
63
+ return cls.__name__
64
+ else:
65
+ failed_matches.append((cls.__name__, sorted(missing_fields)))
66
+
67
+ # If no model matched, raise a helpful error
68
+ error_parts = ["Could not discriminate union type. No model matched the input data."]
69
+ error_parts.append(f"Input fields: {sorted(input_fields)}")
70
+ error_parts.append("\nAttempted models:")
71
+ for model_name, missing in failed_matches:
72
+ error_parts.append(f" - {model_name}: missing required fields {missing}")
73
+
74
+ raise ValueError("\n".join(error_parts))
75
+
76
+ # Create annotated versions with tags
77
+ tagged_classes = tuple(Annotated[cls, Tag(cls.__name__)] for cls in classes_list)
78
+
79
+ # Create Union dynamically
80
+ union_type = Union.__getitem__(tagged_classes)
81
+
82
+ return Annotated[union_type, Discriminator(property_discriminator)]
83
+
84
+
85
+ def get_pydantic_class(data_descriptor_id_or_term_type: str) -> type["DataDescriptor"]:
86
+ """
87
+ Get the Pydantic class for a given data descriptor ID or term type.
88
+
89
+ Args:
90
+ data_descriptor_id_or_term_type: The identifier of the data descriptor or term type
91
+
92
+ Returns:
93
+ The corresponding Pydantic DataDescriptor class
94
+
95
+ Raises:
96
+ EsgvocDbError: If no matching pydantic class is found
97
+ """
98
+ from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
99
+
100
+ if data_descriptor_id_or_term_type in DATA_DESCRIPTOR_CLASS_MAPPING:
101
+ return DATA_DESCRIPTOR_CLASS_MAPPING[data_descriptor_id_or_term_type]
102
+ else:
103
+ raise EsgvocDbError(f"'{data_descriptor_id_or_term_type}' pydantic class not found")
104
+
105
+
106
+ def instantiate_pydantic_term(term: "UTerm | PTerm", selected_term_fields: Iterable[str] | None) -> "DataDescriptor":
107
+ """
108
+ Instantiate a Pydantic DataDescriptor from a database term.
109
+
110
+ Args:
111
+ term: The database term (UTerm or PTerm) to instantiate
112
+ selected_term_fields: Optional list of specific fields to include. If None, all fields are included.
113
+
114
+ Returns:
115
+ A DataDescriptor instance (either DataDescriptorSubSet or the full model)
116
+ """
117
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptorSubSet
118
+
119
+ type = term.specs[api_settings.TERM_TYPE_JSON_KEY]
120
+ if selected_term_fields is not None:
121
+ subset = DataDescriptorSubSet(id=term.id, type=type)
122
+
123
+ # Get model field defaults to use when fields are missing from term.specs
124
+ model_fields = DataDescriptorSubSet.model_fields
125
+
126
+ for field in selected_term_fields:
127
+ # Use model's default value if field is missing from specs
128
+ if field in model_fields and field not in term.specs:
129
+ default_value = model_fields[field].default
130
+ setattr(subset, field, default_value if default_value is not None else term.specs.get(field, None))
131
+ else:
132
+ setattr(subset, field, term.specs.get(field, None))
133
+
134
+ for field in DataDescriptorSubSet.MANDATORY_TERM_FIELDS:
135
+ # Use model's default value if field is missing from specs
136
+ if field in model_fields and field not in term.specs:
137
+ default_value = model_fields[field].default
138
+ setattr(subset, field, default_value if default_value is not None else term.specs.get(field, None))
139
+ else:
140
+ setattr(subset, field, term.specs.get(field, None))
141
+ return subset
142
+ else:
143
+ term_class = get_pydantic_class(type)
144
+
145
+ adapter = TypeAdapter(term_class)
146
+ return adapter.validate_python(term.specs)
esgvoc/api/report.py ADDED
@@ -0,0 +1,127 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Protocol
3
+
4
+ from pydantic import BaseModel, computed_field
5
+
6
+ import esgvoc.core.constants as api_settings
7
+ from esgvoc.core.db.models.mixins import TermKind
8
+
9
+
10
+ class ValidationErrorVisitor(Protocol):
11
+ """
12
+ Specifications for a term validation error visitor.
13
+ """
14
+ def visit_universe_term_error(self, error: "UniverseTermError") -> Any:
15
+ """Visit a universe term error."""
16
+ pass
17
+
18
+ def visit_project_term_error(self, error: "ProjectTermError") -> Any:
19
+ """Visit a project term error."""
20
+ pass
21
+
22
+
23
+ class ValidationError(BaseModel, ABC):
24
+ """
25
+ Generic class for the term validation error.
26
+ """
27
+ value: str
28
+ """The given value that is invalid."""
29
+ term: dict
30
+ """JSON specification of the term."""
31
+ term_kind: TermKind
32
+ """The kind of term."""
33
+ @computed_field # type: ignore
34
+ @property
35
+ def class_name(self) -> str:
36
+ """The class name of the issue for JSON serialization."""
37
+ return self.__class__.__name__
38
+
39
+ @abstractmethod
40
+ def accept(self, visitor: ValidationErrorVisitor) -> Any:
41
+ """
42
+ Accept a validation error visitor.
43
+
44
+ :param visitor: The validation error visitor.
45
+ :type visitor: ValidationErrorVisitor
46
+ :return: Depending on the visitor.
47
+ :rtype: Any
48
+ """
49
+ pass
50
+
51
+
52
+ class UniverseTermError(ValidationError):
53
+ """
54
+ A validation error on a term from the universe.
55
+ """
56
+
57
+ data_descriptor_id: str
58
+ """The data descriptor that the term belongs."""
59
+
60
+ def accept(self, visitor: ValidationErrorVisitor) -> Any:
61
+ return visitor.visit_universe_term_error(self)
62
+
63
+ def __str__(self) -> str:
64
+ term_id = self.term[api_settings.TERM_ID_JSON_KEY]
65
+ result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} " + \
66
+ f"does not validate the given value '{self.value}'"
67
+ return result
68
+
69
+ def __repr__(self) -> str:
70
+ return self.__str__()
71
+
72
+
73
+ class ProjectTermError(ValidationError):
74
+ """
75
+ A validation error on a term from a project.
76
+ """
77
+
78
+ collection_id: str
79
+ """The collection id that the term belongs"""
80
+
81
+ def accept(self, visitor: ValidationErrorVisitor) -> Any:
82
+ return visitor.visit_project_term_error(self)
83
+
84
+ def __str__(self) -> str:
85
+ term_id = self.term[api_settings.TERM_ID_JSON_KEY]
86
+ result = f"The term {term_id} from the collection {self.collection_id} " + \
87
+ f"does not validate the given value '{self.value}'"
88
+ return result
89
+
90
+ def __repr__(self) -> str:
91
+ return self.__str__()
92
+
93
+
94
+ class ValidationReport(BaseModel):
95
+ """
96
+ Term validation report.
97
+ """
98
+
99
+ expression: str
100
+ """The given expression."""
101
+
102
+ errors: list[UniverseTermError | ProjectTermError]
103
+ """The validation errors."""
104
+
105
+ @computed_field # type: ignore
106
+ @property
107
+ def nb_errors(self) -> int:
108
+ """The number of validation errors."""
109
+ return len(self.errors) if self.errors else 0
110
+
111
+ @computed_field # type: ignore
112
+ @property
113
+ def validated(self) -> bool:
114
+ """The expression is validated or not."""
115
+ return False if self.errors else True
116
+
117
+ def __len__(self) -> int:
118
+ return self.nb_errors
119
+
120
+ def __bool__(self) -> bool:
121
+ return self.validated
122
+
123
+ def __str__(self) -> str:
124
+ return f"'{self.expression}' has {self.nb_errors} error(s)"
125
+
126
+ def __repr__(self) -> str:
127
+ return self.__str__()
esgvoc/api/search.py ADDED
@@ -0,0 +1,171 @@
1
+ from enum import Enum
2
+ from typing import Any, Iterable, MutableSequence, Sequence
3
+
4
+ import sqlalchemy as sa
5
+ from pydantic import BaseModel
6
+ from sqlalchemy import ColumnElement
7
+ from sqlalchemy.exc import OperationalError
8
+ from sqlalchemy.sql.expression import Select
9
+ from sqlalchemy.sql.selectable import ExecutableReturnsRows
10
+ from sqlmodel import Column, Field, Session, col
11
+
12
+ import esgvoc.core.service as service
13
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor
14
+ from esgvoc.api.pydantic_handler import instantiate_pydantic_term
15
+ from esgvoc.core.db.models.project import PCollectionFTS5, PTerm, PTermFTS5
16
+ from esgvoc.core.db.models.universe import UDataDescriptorFTS5, UTerm, UTermFTS5
17
+ from esgvoc.core.exceptions import EsgvocDbError, EsgvocValueError
18
+
19
+
20
+ class ItemKind(Enum):
21
+ DATA_DESCRIPTOR = "data_descriptor"
22
+ """Corresponds to a data descriptor"""
23
+ COLLECTION = "collection"
24
+ """Corresponds to a collection"""
25
+ TERM = "term"
26
+ """Corresponds to a term"""
27
+
28
+
29
+ class Item(BaseModel):
30
+ """An item from the universe or a project (data descriptor, collection or term)."""
31
+
32
+ id: str
33
+ """The id of the item."""
34
+ kind: ItemKind = Field(sa_column=Column(sa.Enum(ItemKind)))
35
+ """The kind of the item."""
36
+ parent_id: str
37
+ """The id of the parent of the item."""
38
+
39
+
40
+ def get_universe_session() -> Session:
41
+ UNIVERSE_DB_CONNECTION = service.current_state.universe.db_connection
42
+ if UNIVERSE_DB_CONNECTION:
43
+ return UNIVERSE_DB_CONNECTION.create_session()
44
+ else:
45
+ raise EsgvocDbError("universe connection is not initialized")
46
+
47
+
48
+ def instantiate_pydantic_terms(
49
+ db_terms: Iterable[UTerm | PTerm],
50
+ list_to_populate: MutableSequence[DataDescriptor],
51
+ selected_term_fields: Iterable[str] | None,
52
+ ) -> None:
53
+ for db_term in db_terms:
54
+ try:
55
+ term = instantiate_pydantic_term(db_term, selected_term_fields)
56
+ list_to_populate.append(term)
57
+ except Exception as e:
58
+ # Add context about which term failed
59
+ term_type = db_term.specs.get('type', 'N/A') if hasattr(db_term, 'specs') else 'N/A'
60
+ dd_id = db_term.data_descriptor.id if hasattr(db_term, 'data_descriptor') and db_term.data_descriptor else 'N/A'
61
+ raise ValueError(f"Failed to instantiate term with ID: '{db_term.id}', type: '{term_type}', data_descriptor: '{dd_id}'. Original error: {e}") from e
62
+
63
+
64
+ def process_expression(expression: str) -> str:
65
+ """
66
+ Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
67
+ """
68
+ # 1. Remove single and double quotes.
69
+ result = expression.replace('"', "")
70
+ result = result.replace("'", "")
71
+
72
+ # 2. Escape keywords.
73
+ result = result.replace("NEAR", '"NEAR"')
74
+ result = result.replace("+", '"+"')
75
+ result = result.replace("-", '"-"')
76
+ result = result.replace(":", '":"')
77
+ result = result.replace("^", '"^"')
78
+ result = result.replace("(", '"("')
79
+ result = result.replace(")", '")"')
80
+ result = result.replace(",", '","')
81
+
82
+ # 3. Make single word request a prefix search.
83
+ if not result.endswith("*"):
84
+ tokens = result.split(sep=None)
85
+ if len(tokens) == 1:
86
+ result += "*"
87
+ return result
88
+
89
+
90
+ def generate_matching_condition(
91
+ cls: type[UTermFTS5] | type[UDataDescriptorFTS5] | type[PTermFTS5] | type[PCollectionFTS5],
92
+ expression: str,
93
+ only_id: bool,
94
+ ) -> ColumnElement[bool]:
95
+ processed_expression = process_expression(expression)
96
+ # TODO: fix this when specs will ba available in collections and Data descriptors.
97
+ if cls is PTermFTS5 or cls is UTermFTS5:
98
+ if only_id:
99
+ result = col(cls.id).match(processed_expression)
100
+ else:
101
+ result = col(cls.specs).match(processed_expression) # type: ignore
102
+ else:
103
+ result = col(cls.id).match(processed_expression)
104
+ return result
105
+
106
+
107
+ def handle_rank_limit_offset(statement: Select, limit: int | None, offset: int | None) -> Select:
108
+ statement = statement.order_by(sa.text("rank"))
109
+ if limit and limit > 0: # False if == 0 and is None ; True if != 0 and is not None.
110
+ statement = statement.limit(limit)
111
+ if offset and offset > 0: # False if == 0 and is None ; True if != 0 and is not None.
112
+ statement = statement.offset(offset)
113
+ return statement
114
+
115
+
116
+ def execute_match_statement(expression: str, statement: ExecutableReturnsRows, session: Session) -> Sequence:
117
+ try:
118
+ raw_results = session.exec(statement) # type: ignore
119
+ # raw_results.all() returns a list of sqlalquemy rows.
120
+ results = [result[0] for result in raw_results.all()]
121
+ return results
122
+ except OperationalError as e:
123
+ raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
124
+
125
+
126
+ def execute_find_item_statements(
127
+ session: Session,
128
+ expression: str,
129
+ first_statement: Select,
130
+ second_statement: Select,
131
+ limit: int | None,
132
+ offset: int | None,
133
+ ) -> list[Item]:
134
+ try:
135
+ # Items found are kind of tuple with an object, a kindness, a parent id and a rank.
136
+ first_statement_found = session.exec(first_statement).all() # type: ignore
137
+ second_statement_found = session.exec(second_statement).all() # type: ignore
138
+ tmp_result: list[Any] = list()
139
+ tmp_result.extend(first_statement_found)
140
+ tmp_result.extend(second_statement_found)
141
+ # According to https://sqlite.org/fts5.html#the_bm25_function,
142
+ # "the better matches are assigned numerically lower scores."
143
+ # Sort on the rank column (index 3).
144
+ sorted_tmp_result = sorted(tmp_result, key=lambda r: r[3], reverse=False)
145
+ if offset and offset > 0: # False if == 0 and is None ; True if != 0 and is not None.
146
+ start = offset
147
+ else:
148
+ start = 0
149
+ if limit and limit > 0: # False if == 0 and is None ; True if != 0 and is not None.
150
+ stop = start + limit
151
+ # is OK if stop > len of the list.
152
+ framed_tmp_result = sorted_tmp_result[start:stop]
153
+ else:
154
+ framed_tmp_result = sorted_tmp_result[start:]
155
+ result = [Item(id=r[0], kind=r[1], parent_id=r[2]) for r in framed_tmp_result]
156
+ except OperationalError as e:
157
+ raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
158
+ return result
159
+
160
+
161
+ class MatchingTerm(BaseModel):
162
+ """
163
+ Place holder for a term that matches a value (term validation).
164
+ """
165
+
166
+ project_id: str
167
+ """The project id to which the term belongs."""
168
+ collection_id: str
169
+ """The collection id to which the term belongs."""
170
+ term_id: str
171
+ """The term id."""