esgvoc 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (79) hide show
  1. esgvoc/__init__.py +3 -1
  2. esgvoc/api/__init__.py +96 -72
  3. esgvoc/api/data_descriptors/__init__.py +18 -12
  4. esgvoc/api/data_descriptors/activity.py +8 -45
  5. esgvoc/api/data_descriptors/area_label.py +6 -0
  6. esgvoc/api/data_descriptors/branded_suffix.py +5 -0
  7. esgvoc/api/data_descriptors/branded_variable.py +5 -0
  8. esgvoc/api/data_descriptors/consortium.py +16 -56
  9. esgvoc/api/data_descriptors/data_descriptor.py +106 -0
  10. esgvoc/api/data_descriptors/date.py +3 -46
  11. esgvoc/api/data_descriptors/directory_date.py +3 -46
  12. esgvoc/api/data_descriptors/experiment.py +19 -54
  13. esgvoc/api/data_descriptors/forcing_index.py +3 -45
  14. esgvoc/api/data_descriptors/frequency.py +6 -43
  15. esgvoc/api/data_descriptors/grid_label.py +6 -44
  16. esgvoc/api/data_descriptors/horizontal_label.py +6 -0
  17. esgvoc/api/data_descriptors/initialisation_index.py +3 -44
  18. esgvoc/api/data_descriptors/institution.py +11 -54
  19. esgvoc/api/data_descriptors/license.py +4 -44
  20. esgvoc/api/data_descriptors/mip_era.py +6 -44
  21. esgvoc/api/data_descriptors/model_component.py +7 -45
  22. esgvoc/api/data_descriptors/organisation.py +3 -40
  23. esgvoc/api/data_descriptors/physic_index.py +3 -45
  24. esgvoc/api/data_descriptors/product.py +4 -43
  25. esgvoc/api/data_descriptors/realisation_index.py +3 -44
  26. esgvoc/api/data_descriptors/realm.py +4 -42
  27. esgvoc/api/data_descriptors/resolution.py +6 -44
  28. esgvoc/api/data_descriptors/source.py +18 -53
  29. esgvoc/api/data_descriptors/source_type.py +3 -41
  30. esgvoc/api/data_descriptors/sub_experiment.py +3 -41
  31. esgvoc/api/data_descriptors/table.py +6 -48
  32. esgvoc/api/data_descriptors/temporal_label.py +6 -0
  33. esgvoc/api/data_descriptors/time_range.py +3 -27
  34. esgvoc/api/data_descriptors/variable.py +13 -71
  35. esgvoc/api/data_descriptors/variant_label.py +3 -47
  36. esgvoc/api/data_descriptors/vertical_label.py +5 -0
  37. esgvoc/api/project_specs.py +3 -2
  38. esgvoc/api/projects.py +727 -446
  39. esgvoc/api/py.typed +0 -0
  40. esgvoc/api/report.py +29 -16
  41. esgvoc/api/search.py +140 -95
  42. esgvoc/api/universe.py +362 -156
  43. esgvoc/apps/__init__.py +3 -4
  44. esgvoc/apps/drs/constants.py +1 -1
  45. esgvoc/apps/drs/generator.py +185 -198
  46. esgvoc/apps/drs/report.py +272 -136
  47. esgvoc/apps/drs/validator.py +132 -145
  48. esgvoc/apps/py.typed +0 -0
  49. esgvoc/cli/drs.py +32 -21
  50. esgvoc/cli/get.py +35 -31
  51. esgvoc/cli/install.py +11 -8
  52. esgvoc/cli/main.py +0 -2
  53. esgvoc/cli/status.py +5 -5
  54. esgvoc/cli/valid.py +40 -40
  55. esgvoc/core/constants.py +1 -1
  56. esgvoc/core/db/__init__.py +2 -4
  57. esgvoc/core/db/connection.py +5 -3
  58. esgvoc/core/db/models/project.py +50 -8
  59. esgvoc/core/db/models/universe.py +51 -12
  60. esgvoc/core/db/project_ingestion.py +60 -46
  61. esgvoc/core/db/universe_ingestion.py +58 -29
  62. esgvoc/core/exceptions.py +33 -0
  63. esgvoc/core/logging_handler.py +1 -1
  64. esgvoc/core/repo_fetcher.py +4 -3
  65. esgvoc/core/service/__init__.py +37 -5
  66. esgvoc/core/service/configuration/config_manager.py +188 -0
  67. esgvoc/core/service/configuration/setting.py +88 -0
  68. esgvoc/core/service/state.py +49 -32
  69. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/METADATA +34 -3
  70. esgvoc-0.4.0.dist-info/RECORD +80 -0
  71. esgvoc/api/_utils.py +0 -39
  72. esgvoc/cli/config.py +0 -82
  73. esgvoc/core/service/settings.py +0 -73
  74. esgvoc/core/service/settings.toml +0 -17
  75. esgvoc/core/service/settings_default.toml +0 -17
  76. esgvoc-0.2.1.dist-info/RECORD +0 -73
  77. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/WHEEL +0 -0
  78. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/entry_points.txt +0 -0
  79. {esgvoc-0.2.1.dist-info → esgvoc-0.4.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/py.typed ADDED
File without changes
esgvoc/api/report.py CHANGED
@@ -1,7 +1,8 @@
1
- from pydantic import BaseModel, computed_field
2
1
  from abc import ABC, abstractmethod
3
2
  from typing import Any, Protocol
4
3
 
4
+ from pydantic import BaseModel, computed_field
5
+
5
6
  import esgvoc.core.constants as api_settings
6
7
  from esgvoc.core.db.models.mixins import TermKind
7
8
 
@@ -29,7 +30,12 @@ class ValidationError(BaseModel, ABC):
29
30
  """JSON specification of the term."""
30
31
  term_kind: TermKind
31
32
  """The kind of term."""
32
-
33
+ @computed_field # type: ignore
34
+ @property
35
+ def class_name(self) -> str:
36
+ """The class name of the issue for JSON serialization."""
37
+ return self.__class__.__name__
38
+
33
39
  @abstractmethod
34
40
  def accept(self, visitor: ValidationErrorVisitor) -> Any:
35
41
  """
@@ -42,22 +48,24 @@ class ValidationError(BaseModel, ABC):
42
48
  """
43
49
  pass
44
50
 
51
+
45
52
  class UniverseTermError(ValidationError):
46
53
  """
47
54
  A validation error on a term from the universe.
48
55
  """
49
-
56
+
50
57
  data_descriptor_id: str
51
58
  """The data descriptor that the term belongs."""
52
59
 
53
60
  def accept(self, visitor: ValidationErrorVisitor) -> Any:
54
61
  return visitor.visit_universe_term_error(self)
55
-
62
+
56
63
  def __str__(self) -> str:
57
64
  term_id = self.term[api_settings.TERM_ID_JSON_KEY]
58
- result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} "+\
65
+ result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} " + \
59
66
  f"does not validate the given value '{self.value}'"
60
67
  return result
68
+
61
69
  def __repr__(self) -> str:
62
70
  return self.__str__()
63
71
 
@@ -66,18 +74,19 @@ class ProjectTermError(ValidationError):
66
74
  """
67
75
  A validation error on a term from a project.
68
76
  """
69
-
77
+
70
78
  collection_id: str
71
79
  """The collection id that the term belongs"""
72
80
 
73
81
  def accept(self, visitor: ValidationErrorVisitor) -> Any:
74
82
  return visitor.visit_project_term_error(self)
75
-
83
+
76
84
  def __str__(self) -> str:
77
85
  term_id = self.term[api_settings.TERM_ID_JSON_KEY]
78
- result = f"The term {term_id} from the collection {self.collection_id} "+\
86
+ result = f"The term {term_id} from the collection {self.collection_id} " + \
79
87
  f"does not validate the given value '{self.value}'"
80
88
  return result
89
+
81
90
  def __repr__(self) -> str:
82
91
  return self.__str__()
83
92
 
@@ -86,29 +95,33 @@ class ValidationReport(BaseModel):
86
95
  """
87
96
  Term validation report.
88
97
  """
98
+
89
99
  expression: str
90
100
  """The given expression."""
91
- errors: list[ValidationError]
101
+
102
+ errors: list[UniverseTermError | ProjectTermError]
92
103
  """The validation errors."""
93
- @computed_field # type: ignore
104
+
105
+ @computed_field # type: ignore
94
106
  @property
95
107
  def nb_errors(self) -> int:
96
108
  """The number of validation errors."""
97
109
  return len(self.errors) if self.errors else 0
98
- @computed_field # type: ignore
110
+
111
+ @computed_field # type: ignore
99
112
  @property
100
113
  def validated(self) -> bool:
101
114
  """The expression is validated or not."""
102
115
  return False if self.errors else True
103
-
104
-
116
+
105
117
  def __len__(self) -> int:
106
118
  return self.nb_errors
107
-
119
+
108
120
  def __bool__(self) -> bool:
109
121
  return self.validated
110
-
122
+
111
123
  def __str__(self) -> str:
112
124
  return f"'{self.expression}' has {self.nb_errors} error(s)"
125
+
113
126
  def __repr__(self) -> str:
114
- return self.__str__()
127
+ return self.__str__()
esgvoc/api/search.py CHANGED
@@ -1,8 +1,146 @@
1
1
  from enum import Enum
2
+ from typing import Any, Iterable, MutableSequence, Sequence
2
3
 
4
+ import sqlalchemy as sa
3
5
  from pydantic import BaseModel
4
- from sqlalchemy import ColumnElement, func
5
- from sqlmodel import col
6
+ from sqlalchemy import ColumnElement
7
+ from sqlalchemy.exc import OperationalError
8
+ from sqlalchemy.sql.expression import Select
9
+ from sqlalchemy.sql.selectable import ExecutableReturnsRows
10
+ from sqlmodel import Column, Field, Session, col
11
+
12
+ import esgvoc.core.constants as api_settings
13
+ import esgvoc.core.service as service
14
+ from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
15
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor, DataDescriptorSubSet
16
+ from esgvoc.core.db.models.project import PCollectionFTS5, PTerm, PTermFTS5
17
+ from esgvoc.core.db.models.universe import UDataDescriptorFTS5, UTerm, UTermFTS5
18
+ from esgvoc.core.exceptions import EsgvocDbError, EsgvocValueError
19
+
20
+
21
+ class ItemKind(Enum):
22
+ DATA_DESCRIPTOR = "data_descriptor"
23
+ """Corresponds to a data descriptor"""
24
+ COLLECTION = "collection"
25
+ """Corresponds to a collection"""
26
+ TERM = "term"
27
+ """Corresponds to a term"""
28
+
29
+
30
+ class Item(BaseModel):
31
+ """An item from the universe or a project (data descriptor, collection or term)."""
32
+ id: str
33
+ """The id of the item."""
34
+ kind: ItemKind = Field(sa_column=Column(sa.Enum(ItemKind)))
35
+ """The kind of the item."""
36
+ parent_id: str
37
+ """The id of the parent of the item."""
38
+
39
+
40
+ def get_pydantic_class(data_descriptor_id_or_term_type: str) -> type[DataDescriptor]:
41
+ if data_descriptor_id_or_term_type in DATA_DESCRIPTOR_CLASS_MAPPING:
42
+ return DATA_DESCRIPTOR_CLASS_MAPPING[data_descriptor_id_or_term_type]
43
+ else:
44
+ raise EsgvocDbError(f"'{data_descriptor_id_or_term_type}' pydantic class not found")
45
+
46
+
47
+ def get_universe_session() -> Session:
48
+
49
+ UNIVERSE_DB_CONNECTION = service.current_state.universe.db_connection
50
+ if UNIVERSE_DB_CONNECTION:
51
+ return UNIVERSE_DB_CONNECTION.create_session()
52
+ else:
53
+ raise EsgvocDbError('universe connection is not initialized')
54
+
55
+
56
+ def instantiate_pydantic_term(term: UTerm | PTerm,
57
+ selected_term_fields: Iterable[str] | None) -> DataDescriptor:
58
+ type = term.specs[api_settings.TERM_TYPE_JSON_KEY]
59
+ if selected_term_fields is not None:
60
+ subset = DataDescriptorSubSet(id=term.id, type=type)
61
+ for field in selected_term_fields:
62
+ setattr(subset, field, term.specs.get(field, None))
63
+ for field in DataDescriptorSubSet.MANDATORY_TERM_FIELDS:
64
+ setattr(subset, field, term.specs.get(field, None))
65
+ return subset
66
+ else:
67
+ term_class = get_pydantic_class(type)
68
+ return term_class(**term.specs)
69
+
70
+
71
+ def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
72
+ list_to_populate: MutableSequence[DataDescriptor],
73
+ selected_term_fields: Iterable[str] | None) -> None:
74
+ for db_term in db_terms:
75
+ term = instantiate_pydantic_term(db_term, selected_term_fields)
76
+ list_to_populate.append(term)
77
+
78
+
79
+ def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
80
+ type[PTermFTS5] | type[PCollectionFTS5],
81
+ expression: str,
82
+ only_id: bool) -> ColumnElement[bool]:
83
+ # TODO: fix this when specs will ba available in collections and Data descriptors.
84
+ if cls is PTermFTS5 or cls is UTermFTS5:
85
+ if only_id:
86
+ result = col(cls.id).match(expression)
87
+ else:
88
+ result = col(cls.specs).match(expression) # type: ignore
89
+ else:
90
+ result = col(cls.id).match(expression)
91
+ return result
92
+
93
+
94
+ def handle_rank_limit_offset(statement: Select, limit: int | None, offset: int | None) -> Select:
95
+ statement = statement.order_by(sa.text('rank'))
96
+ if limit and limit > 0: # False if == 0 and is None ; True if != 0 and is not None.
97
+ statement = statement.limit(limit)
98
+ if offset and offset > 0: # False if == 0 and is None ; True if != 0 and is not None.
99
+ statement = statement.offset(offset)
100
+ return statement
101
+
102
+
103
+ def execute_match_statement(expression: str, statement: ExecutableReturnsRows, session: Session) \
104
+ -> Sequence:
105
+ try:
106
+ raw_results = session.exec(statement) # type: ignore
107
+ # raw_results.all() returns a list of sqlalquemy rows.
108
+ results = [result[0] for result in raw_results.all()]
109
+ return results
110
+ except OperationalError as e:
111
+ raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
112
+
113
+
114
+ def execute_find_item_statements(session: Session,
115
+ expression: str,
116
+ first_statement: Select,
117
+ second_statement: Select,
118
+ limit: int | None,
119
+ offset: int | None) -> list[Item]:
120
+ try:
121
+ # Items found are kind of tuple with an object, a kindness, a parent id and a rank.
122
+ first_statement_found = session.exec(first_statement).all() # type: ignore
123
+ second_statement_found = session.exec(second_statement).all() # type: ignore
124
+ tmp_result: list[Any] = list()
125
+ tmp_result.extend(first_statement_found)
126
+ tmp_result.extend(second_statement_found)
127
+ # According to https://sqlite.org/fts5.html#the_bm25_function,
128
+ # "the better matches are assigned numerically lower scores."
129
+ # Sort on the rank column (index 3).
130
+ sorted_tmp_result = sorted(tmp_result, key=lambda r: r[3], reverse=False)
131
+ if offset and offset > 0: # False if == 0 and is None ; True if != 0 and is not None.
132
+ start = offset
133
+ else:
134
+ start = 0
135
+ if limit and limit > 0: # False if == 0 and is None ; True if != 0 and is not None.
136
+ stop = start + limit
137
+ framed_tmp_result = sorted_tmp_result[start: stop] # is OK if stop > len of the list.
138
+ else:
139
+ framed_tmp_result = sorted_tmp_result[start:]
140
+ result = [Item(id=r[0], kind=r[1], parent_id=r[2]) for r in framed_tmp_result]
141
+ except OperationalError as e:
142
+ raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
143
+ return result
6
144
 
7
145
 
8
146
  class MatchingTerm(BaseModel):
@@ -15,96 +153,3 @@ class MatchingTerm(BaseModel):
15
153
  """The collection id to which the term belongs."""
16
154
  term_id: str
17
155
  """The term id."""
18
-
19
-
20
- class SearchType(Enum):
21
- """
22
- The search types used for to find terms.
23
- """
24
- EXACT = "exact"
25
- """Performs exact match."""
26
- LIKE = "like" # can interpret %
27
- """As SQL operator, it can interpret % as a wildcard."""
28
- STARTS_WITH = "starts_with" # can interpret %
29
- """Prefix based search."""
30
- ENDS_WITH = "ends_with" # can interpret %
31
- """Suffix based search."""
32
- REGEX = "regex"
33
- """Search based on regex."""
34
-
35
-
36
- class SearchSettings(BaseModel):
37
- """
38
- Search configuration.
39
- """
40
- type: SearchType = SearchType.EXACT
41
- """The type of search."""
42
- case_sensitive: bool = True
43
- """Enable case sensitivity or not."""
44
- not_operator: bool = False
45
- """Give the opposite result like the NOT SQL operator."""
46
-
47
-
48
- def _create_str_comparison_expression(field: str,
49
- value: str,
50
- settings: SearchSettings|None) -> ColumnElement:
51
- '''
52
- SQLite LIKE is case insensitive (and so STARTS/ENDS_WITH which are implemented with LIKE).
53
- So the case sensitive LIKE is implemented with REGEX.
54
- The i versions of SQLAlchemy operators (icontains, etc.) are not useful
55
- (but other dbs than SQLite should use them).
56
- If the provided `settings` is None, this functions returns an exact search expression.
57
- '''
58
- does_wild_cards_in_value_have_to_be_interpreted = False
59
- # Shortcut.
60
- if settings is None:
61
- return col(field).is_(other=value)
62
- else:
63
- match settings.type:
64
- # Early return because not operator is not implement with tilde symbol.
65
- case SearchType.EXACT:
66
- if settings.case_sensitive:
67
- if settings.not_operator:
68
- return col(field).is_not(other=value)
69
- else:
70
- return col(field).is_(other=value)
71
- else:
72
- if settings.not_operator:
73
- return func.lower(field) != func.lower(value)
74
- else:
75
- return func.lower(field) == func.lower(value)
76
- case SearchType.LIKE:
77
- if settings.case_sensitive:
78
- result = col(field).regexp_match(pattern=f".*{value}.*")
79
- else:
80
- result = col(field).contains(
81
- other=value,
82
- autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
83
- )
84
- case SearchType.STARTS_WITH:
85
- if settings.case_sensitive:
86
- result = col(field).regexp_match(pattern=f"^{value}.*")
87
- else:
88
- result = col(field).startswith(
89
- other=value,
90
- autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
91
- )
92
- case SearchType.ENDS_WITH:
93
- if settings.case_sensitive:
94
- result = col(field).regexp_match(pattern=f"{value}$")
95
- else:
96
- result = col(field).endswith(
97
- other=value,
98
- autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
99
- )
100
- case SearchType.REGEX:
101
- if settings.case_sensitive:
102
- result = col(field).regexp_match(pattern=value)
103
- else:
104
- raise NotImplementedError(
105
- "regex string comparison case insensitive is not implemented"
106
- )
107
- if settings.not_operator:
108
- return ~result
109
- else:
110
- return result