esgvoc 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (87) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/__init__.py +95 -60
  3. esgvoc/api/data_descriptors/__init__.py +50 -28
  4. esgvoc/api/data_descriptors/activity.py +3 -3
  5. esgvoc/api/data_descriptors/area_label.py +16 -1
  6. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  7. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  8. esgvoc/api/data_descriptors/consortium.py +14 -13
  9. esgvoc/api/data_descriptors/contact.py +5 -0
  10. esgvoc/api/data_descriptors/conventions.py +6 -0
  11. esgvoc/api/data_descriptors/creation_date.py +5 -0
  12. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  13. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  14. esgvoc/api/data_descriptors/date.py +1 -1
  15. esgvoc/api/data_descriptors/directory_date.py +1 -1
  16. esgvoc/api/data_descriptors/experiment.py +13 -11
  17. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  18. esgvoc/api/data_descriptors/frequency.py +3 -3
  19. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  20. esgvoc/api/data_descriptors/grid_label.py +2 -2
  21. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  22. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  23. esgvoc/api/data_descriptors/institution.py +8 -5
  24. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  25. esgvoc/api/data_descriptors/license.py +3 -3
  26. esgvoc/api/data_descriptors/mip_era.py +1 -1
  27. esgvoc/api/data_descriptors/model_component.py +1 -1
  28. esgvoc/api/data_descriptors/obs_type.py +5 -0
  29. esgvoc/api/data_descriptors/organisation.py +1 -1
  30. esgvoc/api/data_descriptors/physic_index.py +1 -1
  31. esgvoc/api/data_descriptors/product.py +2 -2
  32. esgvoc/api/data_descriptors/publication_status.py +5 -0
  33. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  34. esgvoc/api/data_descriptors/realm.py +1 -1
  35. esgvoc/api/data_descriptors/region.py +5 -0
  36. esgvoc/api/data_descriptors/resolution.py +3 -3
  37. esgvoc/api/data_descriptors/source.py +9 -5
  38. esgvoc/api/data_descriptors/source_type.py +1 -1
  39. esgvoc/api/data_descriptors/table.py +3 -2
  40. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  41. esgvoc/api/data_descriptors/time_range.py +4 -3
  42. esgvoc/api/data_descriptors/title.py +5 -0
  43. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  44. esgvoc/api/data_descriptors/variable.py +25 -12
  45. esgvoc/api/data_descriptors/variant_label.py +3 -3
  46. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  47. esgvoc/api/project_specs.py +120 -4
  48. esgvoc/api/projects.py +733 -505
  49. esgvoc/api/py.typed +0 -0
  50. esgvoc/api/report.py +12 -8
  51. esgvoc/api/search.py +168 -98
  52. esgvoc/api/universe.py +368 -157
  53. esgvoc/apps/drs/constants.py +1 -1
  54. esgvoc/apps/drs/generator.py +51 -69
  55. esgvoc/apps/drs/report.py +60 -15
  56. esgvoc/apps/drs/validator.py +60 -71
  57. esgvoc/apps/jsg/cmip6_template.json +74 -0
  58. esgvoc/apps/jsg/cmip6plus_template.json +74 -0
  59. esgvoc/apps/jsg/json_schema_generator.py +185 -0
  60. esgvoc/apps/py.typed +0 -0
  61. esgvoc/cli/config.py +500 -0
  62. esgvoc/cli/drs.py +3 -2
  63. esgvoc/cli/find.py +138 -0
  64. esgvoc/cli/get.py +46 -38
  65. esgvoc/cli/main.py +10 -3
  66. esgvoc/cli/status.py +27 -18
  67. esgvoc/cli/valid.py +10 -15
  68. esgvoc/core/constants.py +1 -1
  69. esgvoc/core/db/__init__.py +2 -4
  70. esgvoc/core/db/connection.py +5 -3
  71. esgvoc/core/db/models/project.py +57 -15
  72. esgvoc/core/db/models/universe.py +49 -10
  73. esgvoc/core/db/project_ingestion.py +79 -65
  74. esgvoc/core/db/universe_ingestion.py +71 -40
  75. esgvoc/core/exceptions.py +33 -0
  76. esgvoc/core/logging_handler.py +24 -2
  77. esgvoc/core/repo_fetcher.py +61 -59
  78. esgvoc/core/service/data_merger.py +47 -34
  79. esgvoc/core/service/state.py +107 -83
  80. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
  81. esgvoc-1.0.0.dist-info/RECORD +95 -0
  82. esgvoc/api/_utils.py +0 -53
  83. esgvoc/core/logging.conf +0 -21
  84. esgvoc-0.3.0.dist-info/RECORD +0 -78
  85. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
  86. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
  87. {esgvoc-0.3.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/py.typed ADDED
File without changes
esgvoc/api/report.py CHANGED
@@ -30,11 +30,12 @@ class ValidationError(BaseModel, ABC):
30
30
  """JSON specification of the term."""
31
31
  term_kind: TermKind
32
32
  """The kind of term."""
33
- @computed_field # type: ignore
33
+ @computed_field # type: ignore
34
34
  @property
35
35
  def class_name(self) -> str:
36
- """The class name of the issue for JSON serialization."""
37
- return self.__class__.__name__
36
+ """The class name of the issue for JSON serialization."""
37
+ return self.__class__.__name__
38
+
38
39
  @abstractmethod
39
40
  def accept(self, visitor: ValidationErrorVisitor) -> Any:
40
41
  """
@@ -47,6 +48,7 @@ class ValidationError(BaseModel, ABC):
47
48
  """
48
49
  pass
49
50
 
51
+
50
52
  class UniverseTermError(ValidationError):
51
53
  """
52
54
  A validation error on a term from the universe.
@@ -60,9 +62,10 @@ class UniverseTermError(ValidationError):
60
62
 
61
63
  def __str__(self) -> str:
62
64
  term_id = self.term[api_settings.TERM_ID_JSON_KEY]
63
- result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} "+\
65
+ result = f"The term {term_id} from the data descriptor {self.data_descriptor_id} " + \
64
66
  f"does not validate the given value '{self.value}'"
65
67
  return result
68
+
66
69
  def __repr__(self) -> str:
67
70
  return self.__str__()
68
71
 
@@ -80,9 +83,10 @@ class ProjectTermError(ValidationError):
80
83
 
81
84
  def __str__(self) -> str:
82
85
  term_id = self.term[api_settings.TERM_ID_JSON_KEY]
83
- result = f"The term {term_id} from the collection {self.collection_id} "+\
86
+ result = f"The term {term_id} from the collection {self.collection_id} " + \
84
87
  f"does not validate the given value '{self.value}'"
85
88
  return result
89
+
86
90
  def __repr__(self) -> str:
87
91
  return self.__str__()
88
92
 
@@ -95,16 +99,16 @@ class ValidationReport(BaseModel):
95
99
  expression: str
96
100
  """The given expression."""
97
101
 
98
- errors: list[UniverseTermError|ProjectTermError]
102
+ errors: list[UniverseTermError | ProjectTermError]
99
103
  """The validation errors."""
100
104
 
101
- @computed_field # type: ignore
105
+ @computed_field # type: ignore
102
106
  @property
103
107
  def nb_errors(self) -> int:
104
108
  """The number of validation errors."""
105
109
  return len(self.errors) if self.errors else 0
106
110
 
107
- @computed_field # type: ignore
111
+ @computed_field # type: ignore
108
112
  @property
109
113
  def validated(self) -> bool:
110
114
  """The expression is validated or not."""
esgvoc/api/search.py CHANGED
@@ -1,8 +1,173 @@
1
- from typing import Iterable
2
1
  from enum import Enum
2
+ from typing import Any, Iterable, MutableSequence, Sequence
3
+
4
+ import sqlalchemy as sa
3
5
  from pydantic import BaseModel
4
- from sqlalchemy import ColumnElement, func
5
- from sqlmodel import col
6
+ from sqlalchemy import ColumnElement
7
+ from sqlalchemy.exc import OperationalError
8
+ from sqlalchemy.sql.expression import Select
9
+ from sqlalchemy.sql.selectable import ExecutableReturnsRows
10
+ from sqlmodel import Column, Field, Session, col
11
+
12
+ import esgvoc.core.constants as api_settings
13
+ import esgvoc.core.service as service
14
+ from esgvoc.api.data_descriptors import DATA_DESCRIPTOR_CLASS_MAPPING
15
+ from esgvoc.api.data_descriptors.data_descriptor import DataDescriptor, DataDescriptorSubSet
16
+ from esgvoc.core.db.models.project import PCollectionFTS5, PTerm, PTermFTS5
17
+ from esgvoc.core.db.models.universe import UDataDescriptorFTS5, UTerm, UTermFTS5
18
+ from esgvoc.core.exceptions import EsgvocDbError, EsgvocValueError
19
+
20
+
21
+ class ItemKind(Enum):
22
+ DATA_DESCRIPTOR = "data_descriptor"
23
+ """Corresponds to a data descriptor"""
24
+ COLLECTION = "collection"
25
+ """Corresponds to a collection"""
26
+ TERM = "term"
27
+ """Corresponds to a term"""
28
+
29
+
30
+ class Item(BaseModel):
31
+ """An item from the universe or a project (data descriptor, collection or term)."""
32
+ id: str
33
+ """The id of the item."""
34
+ kind: ItemKind = Field(sa_column=Column(sa.Enum(ItemKind)))
35
+ """The kind of the item."""
36
+ parent_id: str
37
+ """The id of the parent of the item."""
38
+
39
+
40
+ def get_pydantic_class(data_descriptor_id_or_term_type: str) -> type[DataDescriptor]:
41
+ if data_descriptor_id_or_term_type in DATA_DESCRIPTOR_CLASS_MAPPING:
42
+ return DATA_DESCRIPTOR_CLASS_MAPPING[data_descriptor_id_or_term_type]
43
+ else:
44
+ raise EsgvocDbError(f"'{data_descriptor_id_or_term_type}' pydantic class not found")
45
+
46
+
47
+ def get_universe_session() -> Session:
48
+
49
+ UNIVERSE_DB_CONNECTION = service.current_state.universe.db_connection
50
+ if UNIVERSE_DB_CONNECTION:
51
+ return UNIVERSE_DB_CONNECTION.create_session()
52
+ else:
53
+ raise EsgvocDbError('universe connection is not initialized')
54
+
55
+
56
+ def instantiate_pydantic_term(term: UTerm | PTerm,
57
+ selected_term_fields: Iterable[str] | None) -> DataDescriptor:
58
+ type = term.specs[api_settings.TERM_TYPE_JSON_KEY]
59
+ if selected_term_fields is not None:
60
+ subset = DataDescriptorSubSet(id=term.id, type=type)
61
+ for field in selected_term_fields:
62
+ setattr(subset, field, term.specs.get(field, None))
63
+ for field in DataDescriptorSubSet.MANDATORY_TERM_FIELDS:
64
+ setattr(subset, field, term.specs.get(field, None))
65
+ return subset
66
+ else:
67
+ term_class = get_pydantic_class(type)
68
+ return term_class(**term.specs)
69
+
70
+
71
+ def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
72
+ list_to_populate: MutableSequence[DataDescriptor],
73
+ selected_term_fields: Iterable[str] | None) -> None:
74
+ for db_term in db_terms:
75
+ term = instantiate_pydantic_term(db_term, selected_term_fields)
76
+ list_to_populate.append(term)
77
+
78
+
79
+ def process_expression(expression: str) -> str:
80
+ """
81
+ Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
82
+ """
83
+ # 1. Remove single and double quotes.
84
+ result = expression.replace('"', '')
85
+ result = result.replace("'", '')
86
+
87
+ # 2. Escape keywords.
88
+ result = result.replace('NEAR', '"NEAR"')
89
+ result = result.replace('+', '"+"')
90
+ result = result.replace('-', '"-"')
91
+ result = result.replace(':', '":"')
92
+ result = result.replace('^', '"^"')
93
+ result = result.replace('(', '"("')
94
+ result = result.replace(')', '")"')
95
+ result = result.replace(',', '","')
96
+
97
+ # 3. Make single word request a prefix search.
98
+ if not result.endswith('*'):
99
+ tokens = result.split(sep=None)
100
+ if len(tokens) == 1:
101
+ result += '*'
102
+ return result
103
+
104
+
105
+ def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
106
+ type[PTermFTS5] | type[PCollectionFTS5],
107
+ expression: str,
108
+ only_id: bool) -> ColumnElement[bool]:
109
+ processed_expression = process_expression(expression)
110
+ # TODO: fix this when specs will ba available in collections and Data descriptors.
111
+ if cls is PTermFTS5 or cls is UTermFTS5:
112
+ if only_id:
113
+ result = col(cls.id).match(processed_expression)
114
+ else:
115
+ result = col(cls.specs).match(processed_expression) # type: ignore
116
+ else:
117
+ result = col(cls.id).match(processed_expression)
118
+ return result
119
+
120
+
121
+ def handle_rank_limit_offset(statement: Select, limit: int | None, offset: int | None) -> Select:
122
+ statement = statement.order_by(sa.text('rank'))
123
+ if limit and limit > 0: # False if == 0 and is None ; True if != 0 and is not None.
124
+ statement = statement.limit(limit)
125
+ if offset and offset > 0: # False if == 0 and is None ; True if != 0 and is not None.
126
+ statement = statement.offset(offset)
127
+ return statement
128
+
129
+
130
+ def execute_match_statement(expression: str, statement: ExecutableReturnsRows, session: Session) \
131
+ -> Sequence:
132
+ try:
133
+ raw_results = session.exec(statement) # type: ignore
134
+ # raw_results.all() returns a list of sqlalquemy rows.
135
+ results = [result[0] for result in raw_results.all()]
136
+ return results
137
+ except OperationalError as e:
138
+ raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
139
+
140
+
141
+ def execute_find_item_statements(session: Session,
142
+ expression: str,
143
+ first_statement: Select,
144
+ second_statement: Select,
145
+ limit: int | None,
146
+ offset: int | None) -> list[Item]:
147
+ try:
148
+ # Items found are kind of tuple with an object, a kindness, a parent id and a rank.
149
+ first_statement_found = session.exec(first_statement).all() # type: ignore
150
+ second_statement_found = session.exec(second_statement).all() # type: ignore
151
+ tmp_result: list[Any] = list()
152
+ tmp_result.extend(first_statement_found)
153
+ tmp_result.extend(second_statement_found)
154
+ # According to https://sqlite.org/fts5.html#the_bm25_function,
155
+ # "the better matches are assigned numerically lower scores."
156
+ # Sort on the rank column (index 3).
157
+ sorted_tmp_result = sorted(tmp_result, key=lambda r: r[3], reverse=False)
158
+ if offset and offset > 0: # False if == 0 and is None ; True if != 0 and is not None.
159
+ start = offset
160
+ else:
161
+ start = 0
162
+ if limit and limit > 0: # False if == 0 and is None ; True if != 0 and is not None.
163
+ stop = start + limit
164
+ framed_tmp_result = sorted_tmp_result[start: stop] # is OK if stop > len of the list.
165
+ else:
166
+ framed_tmp_result = sorted_tmp_result[start:]
167
+ result = [Item(id=r[0], kind=r[1], parent_id=r[2]) for r in framed_tmp_result]
168
+ except OperationalError as e:
169
+ raise EsgvocValueError(f"unable to interpret expression '{expression}'") from e
170
+ return result
6
171
 
7
172
 
8
173
  class MatchingTerm(BaseModel):
@@ -15,98 +180,3 @@ class MatchingTerm(BaseModel):
15
180
  """The collection id to which the term belongs."""
16
181
  term_id: str
17
182
  """The term id."""
18
-
19
-
20
- class SearchType(Enum):
21
- """
22
- The search types used for to find terms.
23
- """
24
- EXACT = "exact"
25
- """Performs exact match."""
26
- LIKE = "like" # can interpret %
27
- """As SQL operator, it can interpret % as a wildcard."""
28
- STARTS_WITH = "starts_with" # can interpret %
29
- """Prefix based search."""
30
- ENDS_WITH = "ends_with" # can interpret %
31
- """Suffix based search."""
32
- REGEX = "regex"
33
- """Search based on regex."""
34
-
35
-
36
- class SearchSettings(BaseModel):
37
- """
38
- Search configuration.
39
- """
40
- type: SearchType = SearchType.EXACT
41
- """The type of search."""
42
- case_sensitive: bool = True
43
- """Enable case sensitivity or not."""
44
- not_operator: bool = False
45
- """Give the opposite result like the NOT SQL operator."""
46
- selected_term_fields: Iterable[str]|None = None
47
- """Term fields to select"""
48
-
49
-
50
- def _create_str_comparison_expression(field: str,
51
- value: str,
52
- settings: SearchSettings|None) -> ColumnElement:
53
- '''
54
- SQLite LIKE is case insensitive (and so STARTS/ENDS_WITH which are implemented with LIKE).
55
- So the case sensitive LIKE is implemented with REGEX.
56
- The i versions of SQLAlchemy operators (icontains, etc.) are not useful
57
- (but other dbs than SQLite should use them).
58
- If the provided `settings` is None, this functions returns an exact search expression.
59
- '''
60
- does_wild_cards_in_value_have_to_be_interpreted = False
61
- # Shortcut.
62
- if settings is None:
63
- return col(field).is_(other=value)
64
- else:
65
- match settings.type:
66
- # Early return because not operator is not implement with tilde symbol.
67
- case SearchType.EXACT:
68
- if settings.case_sensitive:
69
- if settings.not_operator:
70
- return col(field).is_not(other=value)
71
- else:
72
- return col(field).is_(other=value)
73
- else:
74
- if settings.not_operator:
75
- return func.lower(field) != func.lower(value)
76
- else:
77
- return func.lower(field) == func.lower(value)
78
- case SearchType.LIKE:
79
- if settings.case_sensitive:
80
- result = col(field).regexp_match(pattern=f".*{value}.*")
81
- else:
82
- result = col(field).contains(
83
- other=value,
84
- autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
85
- )
86
- case SearchType.STARTS_WITH:
87
- if settings.case_sensitive:
88
- result = col(field).regexp_match(pattern=f"^{value}.*")
89
- else:
90
- result = col(field).startswith(
91
- other=value,
92
- autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
93
- )
94
- case SearchType.ENDS_WITH:
95
- if settings.case_sensitive:
96
- result = col(field).regexp_match(pattern=f"{value}$")
97
- else:
98
- result = col(field).endswith(
99
- other=value,
100
- autoescape=not does_wild_cards_in_value_have_to_be_interpreted,
101
- )
102
- case SearchType.REGEX:
103
- if settings.case_sensitive:
104
- result = col(field).regexp_match(pattern=value)
105
- else:
106
- raise NotImplementedError(
107
- "regex string comparison case insensitive is not implemented"
108
- )
109
- if settings.not_operator:
110
- return ~result
111
- else:
112
- return result