esgvoc 0.4.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (74) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/data_descriptors/__init__.py +52 -28
  3. esgvoc/api/data_descriptors/activity.py +3 -3
  4. esgvoc/api/data_descriptors/area_label.py +16 -1
  5. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  6. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  7. esgvoc/api/data_descriptors/consortium.py +14 -13
  8. esgvoc/api/data_descriptors/contact.py +5 -0
  9. esgvoc/api/data_descriptors/conventions.py +6 -0
  10. esgvoc/api/data_descriptors/creation_date.py +5 -0
  11. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  12. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  13. esgvoc/api/data_descriptors/date.py +1 -1
  14. esgvoc/api/data_descriptors/directory_date.py +1 -1
  15. esgvoc/api/data_descriptors/experiment.py +13 -11
  16. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  17. esgvoc/api/data_descriptors/frequency.py +3 -3
  18. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  19. esgvoc/api/data_descriptors/grid_label.py +2 -2
  20. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  21. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  22. esgvoc/api/data_descriptors/institution.py +8 -5
  23. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  24. esgvoc/api/data_descriptors/license.py +3 -3
  25. esgvoc/api/data_descriptors/member_id.py +9 -0
  26. esgvoc/api/data_descriptors/mip_era.py +1 -1
  27. esgvoc/api/data_descriptors/model_component.py +1 -1
  28. esgvoc/api/data_descriptors/obs_type.py +5 -0
  29. esgvoc/api/data_descriptors/organisation.py +1 -1
  30. esgvoc/api/data_descriptors/physic_index.py +1 -1
  31. esgvoc/api/data_descriptors/product.py +2 -2
  32. esgvoc/api/data_descriptors/publication_status.py +5 -0
  33. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  34. esgvoc/api/data_descriptors/realm.py +1 -1
  35. esgvoc/api/data_descriptors/region.py +5 -0
  36. esgvoc/api/data_descriptors/resolution.py +3 -3
  37. esgvoc/api/data_descriptors/source.py +9 -5
  38. esgvoc/api/data_descriptors/source_type.py +1 -1
  39. esgvoc/api/data_descriptors/table.py +3 -2
  40. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  41. esgvoc/api/data_descriptors/time_range.py +4 -3
  42. esgvoc/api/data_descriptors/title.py +5 -0
  43. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  44. esgvoc/api/data_descriptors/variable.py +25 -12
  45. esgvoc/api/data_descriptors/variant_label.py +3 -3
  46. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  47. esgvoc/api/project_specs.py +117 -2
  48. esgvoc/api/projects.py +328 -287
  49. esgvoc/api/search.py +30 -3
  50. esgvoc/api/universe.py +42 -27
  51. esgvoc/apps/drs/generator.py +87 -74
  52. esgvoc/apps/jsg/cmip6_template.json +74 -0
  53. esgvoc/apps/jsg/json_schema_generator.py +194 -0
  54. esgvoc/cli/config.py +500 -0
  55. esgvoc/cli/find.py +138 -0
  56. esgvoc/cli/get.py +43 -38
  57. esgvoc/cli/main.py +10 -3
  58. esgvoc/cli/status.py +27 -18
  59. esgvoc/cli/valid.py +10 -15
  60. esgvoc/core/db/models/project.py +11 -11
  61. esgvoc/core/db/models/universe.py +3 -3
  62. esgvoc/core/db/project_ingestion.py +40 -40
  63. esgvoc/core/db/universe_ingestion.py +36 -33
  64. esgvoc/core/logging_handler.py +24 -2
  65. esgvoc/core/repo_fetcher.py +61 -59
  66. esgvoc/core/service/data_merger.py +47 -34
  67. esgvoc/core/service/state.py +107 -83
  68. {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/METADATA +5 -20
  69. esgvoc-1.0.1.dist-info/RECORD +95 -0
  70. esgvoc/core/logging.conf +0 -21
  71. esgvoc-0.4.0.dist-info/RECORD +0 -80
  72. {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/WHEEL +0 -0
  73. {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/entry_points.txt +0 -0
  74. {esgvoc-0.4.0.dist-info → esgvoc-1.0.1.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/search.py CHANGED
@@ -76,18 +76,45 @@ def instantiate_pydantic_terms(db_terms: Iterable[UTerm | PTerm],
76
76
  list_to_populate.append(term)
77
77
 
78
78
 
79
+ def process_expression(expression: str) -> str:
80
+ """
81
+ Allows only SQLite FST operators AND OR NOT and perform prefix search for single word expressions.
82
+ """
83
+ # 1. Remove single and double quotes.
84
+ result = expression.replace('"', '')
85
+ result = result.replace("'", '')
86
+
87
+ # 2. Escape keywords.
88
+ result = result.replace('NEAR', '"NEAR"')
89
+ result = result.replace('+', '"+"')
90
+ result = result.replace('-', '"-"')
91
+ result = result.replace(':', '":"')
92
+ result = result.replace('^', '"^"')
93
+ result = result.replace('(', '"("')
94
+ result = result.replace(')', '")"')
95
+ result = result.replace(',', '","')
96
+
97
+ # 3. Make single word request a prefix search.
98
+ if not result.endswith('*'):
99
+ tokens = result.split(sep=None)
100
+ if len(tokens) == 1:
101
+ result += '*'
102
+ return result
103
+
104
+
79
105
  def generate_matching_condition(cls: type[UTermFTS5] | type[UDataDescriptorFTS5] |
80
106
  type[PTermFTS5] | type[PCollectionFTS5],
81
107
  expression: str,
82
108
  only_id: bool) -> ColumnElement[bool]:
109
+ processed_expression = process_expression(expression)
83
110
  # TODO: fix this when specs will ba available in collections and Data descriptors.
84
111
  if cls is PTermFTS5 or cls is UTermFTS5:
85
112
  if only_id:
86
- result = col(cls.id).match(expression)
113
+ result = col(cls.id).match(processed_expression)
87
114
  else:
88
- result = col(cls.specs).match(expression) # type: ignore
115
+ result = col(cls.specs).match(processed_expression) # type: ignore
89
116
  else:
90
- result = col(cls.id).match(expression)
117
+ result = col(cls.id).match(processed_expression)
91
118
  return result
92
119
 
93
120
 
esgvoc/api/universe.py CHANGED
@@ -13,6 +13,7 @@ from esgvoc.api.search import (
13
13
  handle_rank_limit_offset,
14
14
  instantiate_pydantic_term,
15
15
  instantiate_pydantic_terms,
16
+ process_expression,
16
17
  )
17
18
  from esgvoc.core.db.models.universe import UDataDescriptor, UDataDescriptorFTS5, UTerm, UTermFTS5
18
19
 
@@ -211,12 +212,15 @@ def find_data_descriptors_in_universe(expression: str,
211
212
  offset: int | None = None) -> list[tuple[str, dict]]:
212
213
  """
213
214
  Find data descriptors in the universe based on a full text search defined by the given `expression`.
214
- The `expression` comes from the powerful
215
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
216
- and corresponds to the expression of the `MATCH` operator.
217
- It can be composed of one or multiple keywords combined with boolean
218
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
219
- with the wildcard `*`.
215
+ The `expression` can be composed of one or multiple keywords.
216
+ The keywords can combined with boolean operators: `AND`,
217
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
218
+ if no boolean operators is provided, whitespaces are handled as if there were
219
+ an implicit AND operator between each pair of keywords. Note that this
220
+ function does not provide any priority operator (parenthesis).
221
+ Keywords can define prefixes when adding a `*` at the end of them.
222
+ If the expression is composed of only one keyword, the function
223
+ automatically defines it as a prefix.
220
224
  The function returns a list of data descriptor ids and contexts, sorted according to the
221
225
  bm25 ranking metric (list index `0` has the highest rank).
222
226
  If the provided `expression` does not hit any data descriptor, the function returns an empty list.
@@ -266,12 +270,15 @@ def find_terms_in_universe(expression: str,
266
270
  selected_term_fields: Iterable[str] | None = None) -> list[DataDescriptor]:
267
271
  """
268
272
  Find terms in the universe based on a full-text search defined by the given `expression`.
269
- The `expression` comes from the powerful
270
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
271
- and corresponds to the expression of the `MATCH` operator.
272
- It can be composed of one or multiple keywords combined with boolean
273
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
274
- with the wildcard `*`.
273
+ The `expression` can be composed of one or multiple keywords.
274
+ The keywords can combined with boolean operators: `AND`,
275
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
276
+ if no boolean operators is provided, whitespaces are handled as if there were
277
+ an implicit AND operator between each pair of keywords. Note that this
278
+ function does not provide any priority operator (parenthesis).
279
+ Keywords can define prefixes when adding a `*` at the end of them.
280
+ If the expression is composed of only one keyword, the function
281
+ automatically defines it as a prefix.
275
282
  The function returns a list of term instances sorted according to the
276
283
  bm25 ranking metric (list index `0` has the highest rank).
277
284
  If the provided `expression` does not hit any term, the function returns an empty list.
@@ -323,12 +330,15 @@ def find_terms_in_data_descriptor(expression: str, data_descriptor_id: str,
323
330
  -> list[DataDescriptor]:
324
331
  """
325
332
  Find terms in the given data descriptor based on a full-text search defined by the given `expression`.
326
- The `expression` comes from the powerful
327
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
328
- and corresponds to the expression of the `MATCH` operator.
329
- It can be composed of one or multiple keywords combined with boolean
330
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
331
- with the wildcard `*`.
333
+ The `expression` can be composed of one or multiple keywords.
334
+ The keywords can combined with boolean operators: `AND`,
335
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
336
+ if no boolean operators is provided, whitespaces are handled as if there were
337
+ an implicit AND operator between each pair of keywords. Note that this
338
+ function does not provide any priority operator (parenthesis).
339
+ Keywords can define prefixes when adding a `*` at the end of them.
340
+ If the expression is composed of only one keyword, the function
341
+ automatically defines it as a prefix.
332
342
  The function returns a list of term instances sorted according to the
333
343
  bm25 ranking metric (list index `0` has the highest rank).
334
344
  This function performs an exact match on the `data_descriptor_id`,
@@ -370,12 +380,16 @@ def find_items_in_universe(expression: str,
370
380
  offset: int | None = None) -> list[Item]:
371
381
  """
372
382
  Find items, at the moment terms and data descriptors, in the universe based on a full-text
373
- search defined by the given `expression`. The `expression` comes from the powerful
374
- `SQLite FTS extension <https://sqlite.org/fts5.html#full_text_query_syntax>`_
375
- and corresponds to the expression of the `MATCH` operator.
376
- It can be composed of one or multiple keywords combined with boolean
377
- operators (`NOT`, `AND`, `^`, etc. default is `OR`). Keywords can define prefixes or postfixes
378
- with the wildcard `*`.
383
+ search defined by the given `expression`.
384
+ The `expression` can be composed of one or multiple keywords.
385
+ The keywords can combined with boolean operators: `AND`,
386
+ `OR` and `NOT` (case sensitive). The keywords are separated by whitespaces,
387
+ if no boolean operators is provided, whitespaces are handled as if there were
388
+ an implicit AND operator between each pair of keywords. Note that this
389
+ function does not provide any priority operator (parenthesis).
390
+ Keywords can define prefixes when adding a `*` at the end of them.
391
+ If the expression is composed of only one keyword, the function
392
+ automatically defines it as a prefix.
379
393
  The function returns a list of item instances sorted according to the
380
394
  bm25 ranking metric (list index `0` has the highest rank).
381
395
  If the provided `expression` does not hit any item, the function returns an empty list.
@@ -401,23 +415,24 @@ def find_items_in_universe(expression: str,
401
415
  # TODO: execute union query when it will be possible to compute parent of terms and data descriptors.
402
416
  result = list()
403
417
  with get_universe_session() as session:
418
+ processed_expression = process_expression(expression)
404
419
  if only_id:
405
420
  dd_column = col(UDataDescriptorFTS5.id)
406
421
  term_column = col(UTermFTS5.id)
407
422
  else:
408
423
  dd_column = col(UDataDescriptorFTS5.id) # TODO: use specs when implemented!
409
424
  term_column = col(UTermFTS5.specs) # type: ignore
410
- dd_where_condition = dd_column.match(expression)
425
+ dd_where_condition = dd_column.match(processed_expression)
411
426
  dd_statement = select(UDataDescriptorFTS5.id,
412
427
  text("'data_descriptor' AS TYPE"),
413
428
  text("'universe' AS TYPE"),
414
429
  text('rank')).where(dd_where_condition)
415
- term_where_condition = term_column.match(expression)
430
+ term_where_condition = term_column.match(processed_expression)
416
431
  term_statement = select(UTermFTS5.id,
417
432
  text("'term' AS TYPE"),
418
433
  UDataDescriptor.id,
419
434
  text('rank')).join(UDataDescriptor) \
420
435
  .where(term_where_condition)
421
- result = execute_find_item_statements(session, expression, dd_statement,
436
+ result = execute_find_item_statements(session, processed_expression, dd_statement,
422
437
  term_statement, limit, offset)
423
438
  return result
@@ -2,6 +2,7 @@ from typing import Any, Iterable, Mapping, cast
2
2
 
3
3
  import esgvoc.api.projects as projects
4
4
  from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
5
+ from esgvoc.api.search import MatchingTerm
5
6
  from esgvoc.apps.drs.report import (
6
7
  AssignedTerm,
7
8
  ConflictingCollections,
@@ -92,8 +93,7 @@ class DrsGenerator(DrsApplication):
92
93
  :rtype: DrsGeneratorReport
93
94
  """
94
95
  report = self._generate_from_mapping(mapping, self.file_name_specs)
95
- report.generated_drs_expression = report.generated_drs_expression + \
96
- self._get_full_file_name_extension() # noqa E127
96
+ report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
97
97
  return report
98
98
 
99
99
  def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
@@ -108,12 +108,10 @@ class DrsGenerator(DrsApplication):
108
108
  :rtype: DrsGeneratorReport
109
109
  """
110
110
  report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
111
- report.generated_drs_expression = report.generated_drs_expression + \
112
- self._get_full_file_name_extension() # noqa E127
111
+ report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
113
112
  return report
114
113
 
115
- def generate_from_mapping(self, mapping: Mapping[str, str],
116
- drs_type: DrsType | str) -> DrsGenerationReport:
114
+ def generate_from_mapping(self, mapping: Mapping[str, str], drs_type: DrsType | str) -> DrsGenerationReport:
117
115
  """
118
116
  Generate a DRS expression from a mapping of collection ids and terms.
119
117
 
@@ -134,8 +132,7 @@ class DrsGenerator(DrsApplication):
134
132
  case _:
135
133
  raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
136
134
 
137
- def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) \
138
- -> DrsGenerationReport: # noqa E127
135
+ def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) -> DrsGenerationReport: # noqa E127
139
136
  """
140
137
  Generate a DRS expression from an unordered bag of terms.
141
138
 
@@ -156,23 +153,24 @@ class DrsGenerator(DrsApplication):
156
153
  case _:
157
154
  raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
158
155
 
159
- def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
160
- -> DrsGenerationReport: # noqa E127
156
+ def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
161
157
  drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
162
158
  if self.pedantic:
163
159
  errors.extend(warnings)
164
160
  warnings.clear()
165
- return DrsGenerationReport(project_id=self.project_id, type=specs.type,
166
- given_mapping_or_bag_of_terms=mapping,
167
- mapping_used=mapping,
168
- generated_drs_expression=drs_expression,
169
- errors=cast(list[GenerationError], errors),
170
- warnings=cast(list[GenerationWarning], warnings))
171
-
172
- def __generate_from_mapping(self, mapping: Mapping[str, str],
173
- specs: DrsSpecification,
174
- has_to_valid_terms: bool) \
175
- -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
161
+ return DrsGenerationReport(
162
+ project_id=self.project_id,
163
+ type=specs.type,
164
+ given_mapping_or_bag_of_terms=mapping,
165
+ mapping_used=mapping,
166
+ generated_drs_expression=drs_expression,
167
+ errors=cast(list[GenerationError], errors),
168
+ warnings=cast(list[GenerationWarning], warnings),
169
+ )
170
+
171
+ def __generate_from_mapping(
172
+ self, mapping: Mapping[str, str], specs: DrsSpecification, has_to_valid_terms: bool
173
+ ) -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
176
174
  errors: list[GenerationIssue] = list()
177
175
  warnings: list[GenerationIssue] = list()
178
176
  drs_expression = ""
@@ -185,18 +183,17 @@ class DrsGenerator(DrsApplication):
185
183
  if collection_id in mapping:
186
184
  part_value = mapping[collection_id]
187
185
  if has_to_valid_terms:
188
- matching_terms = projects.valid_term_in_collection(part_value,
189
- self.project_id,
190
- collection_id)
186
+ matching_terms = projects.valid_term_in_collection(part_value, self.project_id, collection_id)
191
187
  if not matching_terms:
192
- issue = InvalidTerm(term=part_value,
193
- term_position=part_position,
194
- collection_id_or_constant_value=collection_id)
188
+ issue = InvalidTerm(
189
+ term=part_value,
190
+ term_position=part_position,
191
+ collection_id_or_constant_value=collection_id,
192
+ )
195
193
  errors.append(issue)
196
194
  part_value = DrsGenerationReport.INVALID_TAG
197
195
  else:
198
- other_issue = MissingTerm(collection_id=collection_id,
199
- collection_position=part_position)
196
+ other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
200
197
  if collection_part.is_required:
201
198
  errors.append(other_issue)
202
199
  part_value = DrsGenerationReport.MISSING_TAG
@@ -209,14 +206,18 @@ class DrsGenerator(DrsApplication):
209
206
 
210
207
  drs_expression += part_value + specs.separator
211
208
 
212
- drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
209
+ drs_expression = drs_expression[0 : len(drs_expression) - len(specs.separator)]
213
210
  return drs_expression, errors, warnings
214
211
 
215
- def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
216
- -> DrsGenerationReport: # noqa E127
212
+ def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
217
213
  collection_terms_mapping: dict[str, set[str]] = dict()
218
214
  for term in terms:
219
- matching_terms = projects.valid_term_in_project(term, self.project_id)
215
+ matching_terms: list[MatchingTerm] = []
216
+ for col in [part.collection_id for part in specs.parts if part.kind == DrsPartKind.COLLECTION]:
217
+ matching_terms_in_col = projects.valid_term_in_collection(term, self.project_id, col)
218
+ for mtic in matching_terms_in_col:
219
+ matching_terms.append(mtic)
220
+ # matching_terms = projects.valid_term_in_project(term, self.project_id)
220
221
  for matching_term in matching_terms:
221
222
  if matching_term.collection_id not in collection_terms_mapping:
222
223
  collection_terms_mapping[matching_term.collection_id] = set()
@@ -229,15 +230,20 @@ class DrsGenerator(DrsApplication):
229
230
  if self.pedantic:
230
231
  errors.extend(warnings)
231
232
  warnings.clear()
232
- return DrsGenerationReport(project_id=self.project_id, type=specs.type,
233
- given_mapping_or_bag_of_terms=terms,
234
- mapping_used=mapping, generated_drs_expression=drs_expression,
235
- errors=cast(list[GenerationError], errors),
236
- warnings=cast(list[GenerationWarning], warnings))
233
+ return DrsGenerationReport(
234
+ project_id=self.project_id,
235
+ type=specs.type,
236
+ given_mapping_or_bag_of_terms=terms,
237
+ mapping_used=mapping,
238
+ generated_drs_expression=drs_expression,
239
+ errors=cast(list[GenerationError], errors),
240
+ warnings=cast(list[GenerationWarning], warnings),
241
+ )
237
242
 
238
243
  @staticmethod
239
- def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
240
- -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
244
+ def _resolve_conflicts(
245
+ collection_terms_mapping: dict[str, set[str]],
246
+ ) -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
241
247
  warnings: list[GenerationIssue] = list()
242
248
  conflicting_collection_ids_list: list[list[str]] = list()
243
249
  collection_ids: list[str] = list(collection_terms_mapping.keys())
@@ -247,13 +253,16 @@ class DrsGenerator(DrsApplication):
247
253
  conflicting_collection_ids: list[str] = list()
248
254
  for r_collection_index in range(l_collection_index + 1, len_collection_ids):
249
255
  if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
250
- collection_terms_mapping[collection_ids[r_collection_index]]):
256
+ collection_terms_mapping[collection_ids[r_collection_index]]
257
+ ):
251
258
  continue
252
259
  else:
253
260
  not_registered = True
254
261
  for cc_ids in conflicting_collection_ids_list:
255
- if collection_ids[l_collection_index] in cc_ids and \
256
- collection_ids[r_collection_index] in cc_ids:
262
+ if (
263
+ collection_ids[l_collection_index] in cc_ids
264
+ and collection_ids[r_collection_index] in cc_ids
265
+ ):
257
266
  not_registered = False
258
267
  break
259
268
  if not_registered:
@@ -287,10 +296,12 @@ class DrsGenerator(DrsApplication):
287
296
  # raise errors, remove the faulty collections and their term.
288
297
  if collection_ids_with_len_eq_1_list:
289
298
  for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
290
- DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
291
- collection_ids_to_be_removed)
292
- DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
293
- collection_ids_to_be_removed)
299
+ DrsGenerator._remove_ids_from_conflicts(
300
+ conflicting_collection_ids_list, collection_ids_to_be_removed
301
+ )
302
+ DrsGenerator._remove_term_from_other_term_sets(
303
+ collection_terms_mapping, collection_ids_to_be_removed
304
+ )
294
305
  # Every time conflicting_collection_ids_list is modified, we must restart the loop,
295
306
  # as conflicting collections may be resolved.
296
307
  continue
@@ -307,10 +318,8 @@ class DrsGenerator(DrsApplication):
307
318
  warnings.append(issue)
308
319
  # 3.b Update conflicting collections.
309
320
  if wining_collection_ids:
310
- DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
311
- wining_collection_ids)
312
- DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
313
- wining_collection_ids)
321
+ DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
322
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
314
323
  # Every time conflicting_collection_ids_list is modified, we must restart the loop,
315
324
  # as conflicting collections may be resolved.
316
325
  continue
@@ -320,13 +329,15 @@ class DrsGenerator(DrsApplication):
320
329
  wining_id_and_term_pairs: list[tuple[str, str]] = list()
321
330
  for collection_ids in conflicting_collection_ids_list:
322
331
  for collection_index in range(0, len(collection_ids)):
323
- collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
324
- diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
325
- .difference(*[collection_terms_mapping[index] # noqa E127
326
- for index in collection_set])
332
+ collection_set = collection_ids[collection_index + 1 :] + collection_ids[:collection_index]
333
+ diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
334
+ *[
335
+ collection_terms_mapping[index] # noqa E127
336
+ for index in collection_set
337
+ ]
338
+ )
327
339
  if len(diff) == 1:
328
- wining_id_and_term_pairs.append((collection_ids[collection_index],
329
- _get_first_item(diff)))
340
+ wining_id_and_term_pairs.append((collection_ids[collection_index], _get_first_item(diff)))
330
341
  # 4.b Update conflicting collections.
331
342
  if wining_id_and_term_pairs:
332
343
  wining_collection_ids = list()
@@ -336,18 +347,17 @@ class DrsGenerator(DrsApplication):
336
347
  collection_terms_mapping[collection_id].add(term)
337
348
  issue = AssignedTerm(collection_id=collection_id, term=term)
338
349
  warnings.append(issue)
339
- DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
340
- wining_collection_ids)
341
- DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
342
- wining_collection_ids)
350
+ DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
351
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
343
352
  continue
344
353
  else:
345
354
  break # Stop the loop when no progress is made.
346
355
  return collection_terms_mapping, warnings
347
356
 
348
357
  @staticmethod
349
- def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
350
- -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
358
+ def _check_collection_terms_mapping(
359
+ collection_terms_mapping: dict[str, set[str]],
360
+ ) -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
351
361
  errors: list[GenerationIssue] = list()
352
362
  # 1. Looking for collections that share strictly the same term(s).
353
363
  collection_ids: list[str] = list(collection_terms_mapping.keys())
@@ -363,8 +373,7 @@ class DrsGenerator(DrsApplication):
363
373
  if l_term_set and (not l_term_set.difference(r_term_set)):
364
374
  not_registered = True
365
375
  for faulty_collections in faulty_collections_list:
366
- if l_collection_id in faulty_collections or \
367
- r_collection_id in faulty_collections:
376
+ if l_collection_id in faulty_collections or r_collection_id in faulty_collections:
368
377
  faulty_collections.add(l_collection_id)
369
378
  faulty_collections.add(r_collection_id)
370
379
  not_registered = False
@@ -373,8 +382,9 @@ class DrsGenerator(DrsApplication):
373
382
  faulty_collections_list.append({l_collection_id, r_collection_id})
374
383
  for faulty_collections in faulty_collections_list:
375
384
  terms = collection_terms_mapping[_get_first_item(faulty_collections)]
376
- issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
377
- terms=_transform_set_and_sort(terms))
385
+ issue = ConflictingCollections(
386
+ collection_ids=_transform_set_and_sort(faulty_collections), terms=_transform_set_and_sort(terms)
387
+ )
378
388
  errors.append(issue)
379
389
  for collection_id in faulty_collections:
380
390
  del collection_terms_mapping[collection_id]
@@ -386,25 +396,28 @@ class DrsGenerator(DrsApplication):
386
396
  if len_term_set == 1:
387
397
  result[collection_id] = _get_first_item(term_set)
388
398
  elif len_term_set > 1:
389
- other_issue = TooManyTermCollection(collection_id=collection_id,
390
- terms=_transform_set_and_sort(term_set))
399
+ other_issue = TooManyTermCollection(
400
+ collection_id=collection_id, terms=_transform_set_and_sort(term_set)
401
+ )
391
402
  errors.append(other_issue)
392
403
  # else: Don't add emptied collection to the result.
393
404
  return result, errors
394
405
 
395
406
  @staticmethod
396
- def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
397
- collection_ids_to_be_removed: list[str]) -> None:
407
+ def _remove_term_from_other_term_sets(
408
+ collection_terms_mapping: dict[str, set[str]], collection_ids_to_be_removed: list[str]
409
+ ) -> None:
398
410
  for collection_id_to_be_removed in collection_ids_to_be_removed:
399
411
  # Should only be one term.
400
412
  term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
401
413
  for collection_id in collection_terms_mapping.keys():
402
- if (collection_id not in collection_ids_to_be_removed):
414
+ if collection_id not in collection_ids_to_be_removed:
403
415
  collection_terms_mapping[collection_id].discard(term_to_be_removed)
404
416
 
405
417
  @staticmethod
406
- def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
407
- collection_ids_to_be_removed: list[str]) -> None:
418
+ def _remove_ids_from_conflicts(
419
+ conflicting_collection_ids_list: list[list[str]], collection_ids_to_be_removed: list[str]
420
+ ) -> None:
408
421
  for collection_id_to_be_removed in collection_ids_to_be_removed:
409
422
  for conflicting_collection_ids in conflicting_collection_ids_list:
410
423
  if collection_id_to_be_removed in conflicting_collection_ids:
@@ -0,0 +1,74 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json#",
4
+ "title": "CMIP6 Extension",
5
+ "description": "STAC CMIP6 Extension for STAC Items and STAC Collection Summaries.",
6
+ "type": "object",
7
+ "required": [
8
+ "stac_extensions"
9
+ ],
10
+ "properties": {
11
+ "stac_extensions": {
12
+ "type": "array",
13
+ "contains": {
14
+ "const": "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json"
15
+ }
16
+ }
17
+ },
18
+ "oneOf": [
19
+ {
20
+ "$comment": "This is the schema for STAC Items.",
21
+ "type": "object",
22
+ "required": [
23
+ "type",
24
+ "properties"
25
+ ],
26
+ "properties": {
27
+ "type": {
28
+ "const": "Feature"
29
+ },
30
+ "properties": {
31
+ "allOf": [
32
+ {
33
+ "$ref": "#/definitions/require_any"
34
+ },
35
+ {
36
+ "$ref": "#/definitions/fields"
37
+ }
38
+ ]
39
+ }
40
+ }
41
+ },
42
+ {
43
+ "$comment": "This is the schema for STAC Collections, or more specifically only Collection Summaries in this case. By default, only checks the existence of the properties, but not the schema of the summaries.",
44
+ "type": "object",
45
+ "required": [
46
+ "type",
47
+ "summaries"
48
+ ],
49
+ "properties": {
50
+ "type": {
51
+ "const": "Collection"
52
+ },
53
+ "summaries": {
54
+ "$ref": "#/definitions/require_any"
55
+ }
56
+ }
57
+ }
58
+ ],
59
+ "definitions": {
60
+ "require_any": {
61
+ "$comment": "Please list all fields here so that we can force the existence of one of them in other parts of the schemas."
62
+ },
63
+ "fields": {
64
+ "$comment": " Don't require fields here, do that above in the corresponding schema.",
65
+ "type": "object",
66
+ "properties": {
67
+ },
68
+ "patternProperties": {
69
+ "^(?!cmip6:)": {}
70
+ },
71
+ "additionalProperties": false
72
+ }
73
+ }
74
+ }