esgvoc 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (43) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/__init__.py +0 -6
  3. esgvoc/api/data_descriptors/__init__.py +8 -0
  4. esgvoc/api/data_descriptors/archive.py +5 -0
  5. esgvoc/api/data_descriptors/citation_url.py +5 -0
  6. esgvoc/api/data_descriptors/experiment.py +2 -2
  7. esgvoc/api/data_descriptors/known_branded_variable.py +58 -5
  8. esgvoc/api/data_descriptors/member_id.py +9 -0
  9. esgvoc/api/data_descriptors/regex.py +5 -0
  10. esgvoc/api/data_descriptors/vertical_label.py +2 -2
  11. esgvoc/api/project_specs.py +48 -130
  12. esgvoc/api/projects.py +185 -66
  13. esgvoc/apps/drs/generator.py +103 -85
  14. esgvoc/apps/drs/validator.py +22 -38
  15. esgvoc/apps/jsg/json_schema_generator.py +255 -130
  16. esgvoc/apps/jsg/templates/template.jinja +249 -0
  17. esgvoc/apps/test_cv/README.md +214 -0
  18. esgvoc/apps/test_cv/cv_tester.py +1368 -0
  19. esgvoc/apps/test_cv/example_usage.py +216 -0
  20. esgvoc/apps/vr/__init__.py +12 -0
  21. esgvoc/apps/vr/build_variable_registry.py +71 -0
  22. esgvoc/apps/vr/example_usage.py +60 -0
  23. esgvoc/apps/vr/vr_app.py +333 -0
  24. esgvoc/cli/config.py +671 -86
  25. esgvoc/cli/drs.py +39 -21
  26. esgvoc/cli/main.py +2 -0
  27. esgvoc/cli/test_cv.py +257 -0
  28. esgvoc/core/constants.py +10 -7
  29. esgvoc/core/data_handler.py +24 -22
  30. esgvoc/core/db/connection.py +7 -0
  31. esgvoc/core/db/project_ingestion.py +34 -9
  32. esgvoc/core/db/universe_ingestion.py +1 -2
  33. esgvoc/core/service/configuration/setting.py +192 -21
  34. esgvoc/core/service/data_merger.py +1 -1
  35. esgvoc/core/service/state.py +18 -2
  36. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/METADATA +2 -3
  37. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/RECORD +41 -30
  38. esgvoc/apps/jsg/cmip6_template.json +0 -74
  39. esgvoc/apps/jsg/cmip6plus_template.json +0 -74
  40. /esgvoc/apps/{py.typed → test_cv/__init__.py} +0 -0
  41. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/WHEEL +0 -0
  42. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/entry_points.txt +0 -0
  43. {esgvoc-1.0.0.dist-info → esgvoc-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/api/projects.py CHANGED
@@ -1,5 +1,6 @@
1
+ import itertools
1
2
  import re
2
- from typing import Iterable, Sequence
3
+ from typing import Iterable, Sequence, cast
3
4
 
4
5
  from sqlalchemy import text
5
6
  from sqlmodel import Session, and_, col, select
@@ -48,22 +49,36 @@ def _get_project_session_with_exception(project_id: str) -> Session:
48
49
  raise EsgvocNotFoundError(f"unable to find project '{project_id}'")
49
50
 
50
51
 
51
- def _resolve_term(composite_term_part: dict, universe_session: Session, project_session: Session) -> UTerm | PTerm:
52
- # First find the term in the universe than in the current project
53
- term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
54
- term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
55
- uterm = universe._get_term_in_data_descriptor(
56
- data_descriptor_id=term_type, term_id=term_id, session=universe_session
57
- )
58
- if uterm:
59
- return uterm
60
- else:
61
- pterm = _get_term_in_collection(collection_id=term_type, term_id=term_id, session=project_session)
62
- if pterm:
63
- return pterm
52
+ def _resolve_composite_term_part(composite_term_part: dict,
53
+ universe_session: Session,
54
+ project_session: Session) -> UTerm | PTerm | Sequence[UTerm | PTerm]:
55
+ if constants.TERM_ID_JSON_KEY in composite_term_part:
56
+ # First find the term in the universe than in the current project
57
+ term_id = composite_term_part[constants.TERM_ID_JSON_KEY]
58
+ term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
59
+ uterm = universe._get_term_in_data_descriptor(data_descriptor_id=term_type,
60
+ term_id=term_id, session=universe_session)
61
+ if uterm:
62
+ return uterm
63
+ else:
64
+ pterm = _get_term_in_collection(collection_id=term_type, term_id=term_id, session=project_session)
65
+ if pterm:
66
+ return pterm
67
+ else:
68
+ msg = f"unable to find the term '{term_id}' in '{term_type}'"
69
+ raise EsgvocNotFoundError(msg)
64
70
  else:
65
- msg = f"unable to find the term '{term_id}' in '{term_type}'"
66
- raise EsgvocNotFoundError(msg)
71
+ term_type = composite_term_part[constants.TERM_TYPE_JSON_KEY]
72
+ data_descriptor = universe._get_data_descriptor_in_universe(term_type, universe_session)
73
+ if data_descriptor is not None:
74
+ return data_descriptor.terms
75
+ else:
76
+ collection = _get_collection_in_project(term_type, project_session)
77
+ if collection is not None:
78
+ return collection.terms
79
+ else:
80
+ msg = f"unable to find the terms of '{term_type}'"
81
+ raise EsgvocNotFoundError(msg)
67
82
 
68
83
 
69
84
  def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]:
@@ -72,42 +87,82 @@ def _get_composite_term_separator_parts(term: UTerm | PTerm) -> tuple[str, list]
72
87
  return separator, parts
73
88
 
74
89
 
75
- # TODO: support optionality of parts of composite.
76
- # It is backtrack possible for more than one missing parts.
77
90
  def _valid_value_composite_term_with_separator(
78
91
  value: str, term: UTerm | PTerm, universe_session: Session, project_session: Session
79
92
  ) -> list[UniverseTermError | ProjectTermError]:
80
- result = list()
81
93
  separator, parts = _get_composite_term_separator_parts(term)
82
- if separator in value:
83
- splits = value.split(separator)
84
- if len(splits) == len(parts):
85
- for index in range(0, len(splits)):
86
- given_value = splits[index]
87
- if "id" not in parts[index].keys():
88
- terms = universe.get_all_terms_in_data_descriptor(parts[index]["type"], None)
89
- parts[index]["id"] = [term.id for term in terms]
90
- if type(parts[index]["id"]) is str:
91
- parts[index]["id"] = [parts[index]["id"]]
92
-
93
- errors_list = list()
94
- for id in parts[index]["id"]:
95
- part_parts = dict(parts[index])
96
- part_parts["id"] = id
97
- resolved_term = _resolve_term(part_parts, universe_session, project_session)
98
- errors = _valid_value(given_value, resolved_term, universe_session, project_session)
99
- if len(errors) == 0:
100
- errors_list = errors
101
- break
102
- else:
103
- errors_list.extend(errors)
104
- else:
105
- result.append(_create_term_error(value, term))
106
- else:
107
- result.append(_create_term_error(value, term))
108
- else:
109
- result.append(_create_term_error(value, term))
110
- return result
94
+ required_indices = {i for i, p in enumerate(parts) if p.get("is_required", False)}
95
+
96
+ splits = value.split(separator)
97
+ nb_splits = len(splits)
98
+ nb_parts = len(parts)
99
+
100
+ if nb_splits > nb_parts:
101
+ return [_create_term_error(value, term)]
102
+
103
+ # Generate all possible assignments of split values into parts
104
+ # Only keep those that include all required parts
105
+ all_positions = [i for i in range(nb_parts)]
106
+ valid_combinations = [
107
+ comb for comb in itertools.combinations(all_positions, nb_splits) if required_indices.issubset(comb)
108
+ ]
109
+
110
+ for positions in valid_combinations:
111
+ candidate = [None] * nb_parts
112
+ for idx, pos in enumerate(positions):
113
+ candidate[pos] = splits[idx]
114
+
115
+ # Separator structure validation:
116
+ # - No leading separator if the first part is None
117
+ # - No trailing separator if the last part is None
118
+ # - No double separators where two adjacent optional parts are missing
119
+ if candidate[0] is None and value.startswith(separator):
120
+ continue
121
+ if candidate[-1] is None and value.endswith(separator):
122
+ continue
123
+ if any(
124
+ candidate[i] is None and candidate[i + 1] is None and separator * 2 in value for i in range(nb_parts - 1)
125
+ ):
126
+ continue # invalid double separator between two missing parts
127
+
128
+ # Validate each filled part value
129
+ all_valid = True
130
+ for i, given_value in enumerate(candidate):
131
+ if given_value is None:
132
+ if parts[i].get("is_required", False):
133
+ all_valid = False
134
+ break
135
+ continue # optional and missing part is allowed
136
+
137
+ part = parts[i]
138
+
139
+ # Resolve term ID list if not present
140
+ if "id" not in part:
141
+ terms = universe.get_all_terms_in_data_descriptor(part["type"], None)
142
+ part["id"] = [term.id for term in terms]
143
+ if isinstance(part["id"], str):
144
+ part["id"] = [part["id"]]
145
+
146
+ # Try all possible term IDs to find a valid match
147
+ valid_for_this_part = False
148
+ for id in part["id"]:
149
+ part_copy = dict(part)
150
+ part_copy["id"] = id
151
+ resolved_term = _resolve_composite_term_part(part_copy, universe_session, project_session)
152
+ # resolved_term can't be a list of terms here.
153
+ resolved_term = cast(UTerm | PTerm, resolved_term)
154
+ errors = _valid_value(given_value, resolved_term, universe_session, project_session)
155
+ if not errors:
156
+ valid_for_this_part = True
157
+ break
158
+ if not valid_for_this_part:
159
+ all_valid = False
160
+ break
161
+
162
+ if all_valid:
163
+ return [] # At least one valid combination found
164
+
165
+ return [_create_term_error(value, term)] # No valid combination found
111
166
 
112
167
 
113
168
  def _transform_to_pattern(term: UTerm | PTerm, universe_session: Session, project_session: Session) -> str:
@@ -123,8 +178,13 @@ def _transform_to_pattern(term: UTerm | PTerm, universe_session: Session, projec
123
178
  separator, parts = _get_composite_term_separator_parts(term)
124
179
  result = ""
125
180
  for part in parts:
126
- resolved_term = _resolve_term(part, universe_session, project_session)
127
- pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
181
+ resolved_term = _resolve_composite_term_part(part, universe_session, project_session)
182
+ if isinstance(resolved_term, Sequence):
183
+ pattern = ""
184
+ for r_term in resolved_term:
185
+ pattern += _transform_to_pattern(r_term, universe_session, project_session)
186
+ else:
187
+ pattern = _transform_to_pattern(resolved_term, universe_session, project_session)
128
188
  result = f"{result}{pattern}{separator}"
129
189
  result = result.rstrip(separator)
130
190
  case _:
@@ -452,7 +512,52 @@ def get_all_terms_in_collection(
452
512
  def _get_all_collections_in_project(session: Session) -> list[PCollection]:
453
513
  project = session.get(Project, constants.SQLITE_FIRST_PK)
454
514
  # Project can't be missing if session exists.
455
- return project.collections # type: ignore
515
+ try:
516
+ return project.collections # type: ignore
517
+ except Exception as e:
518
+ # Enhanced error context for collection retrieval failures
519
+ import logging
520
+ logger = logging.getLogger(__name__)
521
+ logger.error(f"Failed to retrieve collections for project '{project.id}': {str(e)}")
522
+
523
+ # Use raw SQL to inspect collections without Pydantic validation
524
+ from sqlalchemy import text
525
+ try:
526
+ # Query raw data to identify problematic collections
527
+ raw_query = text("""
528
+ SELECT id, term_kind, data_descriptor_id
529
+ FROM pcollections
530
+ WHERE project_pk = :project_pk
531
+ """)
532
+ result = session.execute(raw_query, {"project_pk": project.pk})
533
+
534
+ problematic_collections = []
535
+
536
+ for row in result:
537
+ collection_id, term_kind_value, data_descriptor_id = row
538
+
539
+ # Only empty string is invalid - indicates ingestion couldn't determine termkind
540
+ if term_kind_value == '' or term_kind_value is None:
541
+ problematic_collections.append((collection_id, term_kind_value, data_descriptor_id))
542
+ msg = f"Collection '{collection_id}' has empty term_kind (data_descriptor: " + \
543
+ f"{data_descriptor_id}) - CV ingestion failed to determine termkind"
544
+ logger.error(msg)
545
+
546
+ if problematic_collections:
547
+ error_details = []
548
+ for col_id, _, data_desc in problematic_collections:
549
+ error_details.append(f" • Collection '{col_id}' (data_descriptor: {data_desc}): EMPTY termkind")
550
+
551
+ error_msg = (
552
+ f"Found {len(problematic_collections)} collections with empty term_kind:\n" +
553
+ "\n".join(error_details)
554
+ )
555
+ raise ValueError(error_msg) from e
556
+
557
+ except Exception as inner_e:
558
+ logger.error(f"Failed to analyze problematic collections using raw SQL: {inner_e}")
559
+
560
+ raise e
456
561
 
457
562
 
458
563
  def get_all_collections_in_project(project_id: str) -> list[str]:
@@ -469,10 +574,24 @@ def get_all_collections_in_project(project_id: str) -> list[str]:
469
574
  """
470
575
  result = list()
471
576
  if connection := _get_project_connection(project_id):
472
- with connection.create_session() as session:
473
- collections = _get_all_collections_in_project(session)
474
- for collection in collections:
475
- result.append(collection.id)
577
+ try:
578
+ with connection.create_session() as session:
579
+ collections = _get_all_collections_in_project(session)
580
+ for collection in collections:
581
+ result.append(collection.id)
582
+ except Exception as e:
583
+ # Enhanced error context for project collection retrieval
584
+ import logging
585
+ logger = logging.getLogger(__name__)
586
+ logger.error(f"Failed to get collections for project '{project_id}': {str(e)}")
587
+
588
+ # Re-raise with enhanced context
589
+ raise ValueError(
590
+ f"Failed to retrieve collections for project '{project_id}'. "
591
+ f"This may be due to invalid termkind values in the database. "
592
+ f"Check the project database for collections with empty or invalid termkind values. "
593
+ f"Original error: {str(e)}"
594
+ ) from e
476
595
  return result
477
596
 
478
597
 
@@ -1113,16 +1232,16 @@ def find_items_in_project(
1113
1232
  collection_column = col(PCollectionFTS5.id) # TODO: use specs when implemented!
1114
1233
  term_column = col(PTermFTS5.specs) # type: ignore
1115
1234
  collection_where_condition = collection_column.match(processed_expression)
1116
- collection_statement = select(PCollectionFTS5.id,
1117
- text("'collection' AS TYPE"),
1118
- text(f"'{project_id}' AS TYPE"),
1119
- text('rank')).where(collection_where_condition)
1235
+ collection_statement = select(
1236
+ PCollectionFTS5.id, text("'collection' AS TYPE"), text(f"'{project_id}' AS TYPE"), text("rank")
1237
+ ).where(collection_where_condition)
1120
1238
  term_where_condition = term_column.match(processed_expression)
1121
- term_statement = select(PTermFTS5.id,
1122
- text("'term' AS TYPE"),
1123
- PCollection.id,
1124
- text('rank')).join(PCollection) \
1125
- .where(term_where_condition)
1126
- result = execute_find_item_statements(session, processed_expression, collection_statement,
1127
- term_statement, limit, offset)
1239
+ term_statement = (
1240
+ select(PTermFTS5.id, text("'term' AS TYPE"), PCollection.id, text("rank"))
1241
+ .join(PCollection)
1242
+ .where(term_where_condition)
1243
+ )
1244
+ result = execute_find_item_statements(
1245
+ session, processed_expression, collection_statement, term_statement, limit, offset
1246
+ )
1128
1247
  return result
@@ -1,7 +1,8 @@
1
1
  from typing import Any, Iterable, Mapping, cast
2
2
 
3
3
  import esgvoc.api.projects as projects
4
- from esgvoc.api.project_specs import DrsCollection, DrsConstant, DrsPartKind, DrsSpecification, DrsType
4
+ from esgvoc.api.project_specs import DrsSpecification, DrsType
5
+ from esgvoc.api.search import MatchingTerm
5
6
  from esgvoc.apps.drs.report import (
6
7
  AssignedTerm,
7
8
  ConflictingCollections,
@@ -92,8 +93,7 @@ class DrsGenerator(DrsApplication):
92
93
  :rtype: DrsGeneratorReport
93
94
  """
94
95
  report = self._generate_from_mapping(mapping, self.file_name_specs)
95
- report.generated_drs_expression = report.generated_drs_expression + \
96
- self._get_full_file_name_extension() # noqa E127
96
+ report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
97
97
  return report
98
98
 
99
99
  def generate_file_name_from_bag_of_terms(self, terms: Iterable[str]) -> DrsGenerationReport:
@@ -108,12 +108,10 @@ class DrsGenerator(DrsApplication):
108
108
  :rtype: DrsGeneratorReport
109
109
  """
110
110
  report = self._generate_from_bag_of_terms(terms, self.file_name_specs)
111
- report.generated_drs_expression = report.generated_drs_expression + \
112
- self._get_full_file_name_extension() # noqa E127
111
+ report.generated_drs_expression = report.generated_drs_expression + self._get_full_file_name_extension() # noqa E127
113
112
  return report
114
113
 
115
- def generate_from_mapping(self, mapping: Mapping[str, str],
116
- drs_type: DrsType | str) -> DrsGenerationReport:
114
+ def generate_from_mapping(self, mapping: Mapping[str, str], drs_type: DrsType | str) -> DrsGenerationReport:
117
115
  """
118
116
  Generate a DRS expression from a mapping of collection ids and terms.
119
117
 
@@ -134,8 +132,7 @@ class DrsGenerator(DrsApplication):
134
132
  case _:
135
133
  raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
136
134
 
137
- def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) \
138
- -> DrsGenerationReport: # noqa E127
135
+ def generate_from_bag_of_terms(self, terms: Iterable[str], drs_type: DrsType | str) -> DrsGenerationReport: # noqa E127
139
136
  """
140
137
  Generate a DRS expression from an unordered bag of terms.
141
138
 
@@ -156,67 +153,78 @@ class DrsGenerator(DrsApplication):
156
153
  case _:
157
154
  raise EsgvocDbError(f"unsupported drs type '{drs_type}'")
158
155
 
159
- def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) \
160
- -> DrsGenerationReport: # noqa E127
156
+ def _generate_from_mapping(self, mapping: Mapping[str, str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
161
157
  drs_expression, errors, warnings = self.__generate_from_mapping(mapping, specs, True)
162
158
  if self.pedantic:
163
159
  errors.extend(warnings)
164
160
  warnings.clear()
165
- return DrsGenerationReport(project_id=self.project_id, type=specs.type,
166
- given_mapping_or_bag_of_terms=mapping,
167
- mapping_used=mapping,
168
- generated_drs_expression=drs_expression,
169
- errors=cast(list[GenerationError], errors),
170
- warnings=cast(list[GenerationWarning], warnings))
171
-
172
- def __generate_from_mapping(self, mapping: Mapping[str, str],
173
- specs: DrsSpecification,
174
- has_to_valid_terms: bool) \
175
- -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
161
+ return DrsGenerationReport(
162
+ project_id=self.project_id,
163
+ type=specs.type,
164
+ given_mapping_or_bag_of_terms=mapping,
165
+ mapping_used=mapping,
166
+ generated_drs_expression=drs_expression,
167
+ errors=cast(list[GenerationError], errors),
168
+ warnings=cast(list[GenerationWarning], warnings),
169
+ )
170
+
171
+ def __generate_from_mapping(
172
+ self, mapping: Mapping[str, str], specs: DrsSpecification, has_to_valid_terms: bool
173
+ ) -> tuple[str, list[GenerationIssue], list[GenerationIssue]]: # noqa E127
176
174
  errors: list[GenerationIssue] = list()
177
175
  warnings: list[GenerationIssue] = list()
178
176
  drs_expression = ""
179
177
  part_position: int = 0
180
178
  for part in specs.parts:
181
179
  part_position += 1
182
- if part.kind == DrsPartKind.COLLECTION:
183
- collection_part = cast(DrsCollection, part)
184
- collection_id = collection_part.collection_id
185
- if collection_id in mapping:
186
- part_value = mapping[collection_id]
187
- if has_to_valid_terms:
180
+ collection_id = part.source_collection
181
+ if collection_id in mapping:
182
+ part_value = mapping[collection_id]
183
+ if has_to_valid_terms:
184
+ if part.source_collection_term is None:
188
185
  matching_terms = projects.valid_term_in_collection(part_value,
189
186
  self.project_id,
190
187
  collection_id)
191
- if not matching_terms:
192
- issue = InvalidTerm(term=part_value,
193
- term_position=part_position,
194
- collection_id_or_constant_value=collection_id)
195
- errors.append(issue)
196
- part_value = DrsGenerationReport.INVALID_TAG
197
- else:
198
- other_issue = MissingTerm(collection_id=collection_id,
199
- collection_position=part_position)
200
- if collection_part.is_required:
201
- errors.append(other_issue)
202
- part_value = DrsGenerationReport.MISSING_TAG
203
188
  else:
204
- warnings.append(other_issue)
205
- continue # The for loop.
189
+ matching_terms = projects.valid_term(
190
+ part_value,
191
+ self.project_id,
192
+ collection_id,
193
+ part.source_collection_term).validated
194
+ if not matching_terms:
195
+ issue = InvalidTerm(term=part_value,
196
+ term_position=part_position,
197
+ collection_id_or_constant_value=collection_id)
198
+ errors.append(issue)
199
+ part_value = DrsGenerationReport.INVALID_TAG
206
200
  else:
207
- constant_part = cast(DrsConstant, part)
208
- part_value = constant_part.value
201
+ other_issue = MissingTerm(collection_id=collection_id, collection_position=part_position)
202
+ if part.is_required:
203
+ errors.append(other_issue)
204
+ part_value = DrsGenerationReport.MISSING_TAG
205
+ else:
206
+ warnings.append(other_issue)
207
+ continue # The for loop.
209
208
 
210
209
  drs_expression += part_value + specs.separator
211
210
 
212
- drs_expression = drs_expression[0:len(drs_expression)-len(specs.separator)]
211
+ drs_expression = drs_expression[0: len(drs_expression) - len(specs.separator)]
213
212
  return drs_expression, errors, warnings
214
213
 
215
- def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) \
216
- -> DrsGenerationReport: # noqa E127
214
+ def _generate_from_bag_of_terms(self, terms: Iterable[str], specs: DrsSpecification) -> DrsGenerationReport: # noqa E127
217
215
  collection_terms_mapping: dict[str, set[str]] = dict()
218
216
  for term in terms:
219
- matching_terms = projects.valid_term_in_project(term, self.project_id)
217
+ matching_terms: list[MatchingTerm] = list()
218
+ for part in specs.parts:
219
+ if part.source_collection_term is None:
220
+ matching_terms.extend(projects.valid_term_in_collection(term, self.project_id,
221
+ part.source_collection))
222
+ else:
223
+ if projects.valid_term(term, self.project_id, part.source_collection,
224
+ part.source_collection_term).validated:
225
+ matching_terms.append(MatchingTerm(project_id=self.project_id,
226
+ collection_id=part.source_collection,
227
+ term_id=part.source_collection_term))
220
228
  for matching_term in matching_terms:
221
229
  if matching_term.collection_id not in collection_terms_mapping:
222
230
  collection_terms_mapping[matching_term.collection_id] = set()
@@ -229,15 +237,18 @@ class DrsGenerator(DrsApplication):
229
237
  if self.pedantic:
230
238
  errors.extend(warnings)
231
239
  warnings.clear()
232
- return DrsGenerationReport(project_id=self.project_id, type=specs.type,
240
+ return DrsGenerationReport(project_id=self.project_id,
241
+ type=specs.type,
233
242
  given_mapping_or_bag_of_terms=terms,
234
- mapping_used=mapping, generated_drs_expression=drs_expression,
243
+ mapping_used=mapping,
244
+ generated_drs_expression=drs_expression,
235
245
  errors=cast(list[GenerationError], errors),
236
246
  warnings=cast(list[GenerationWarning], warnings))
237
247
 
238
248
  @staticmethod
239
- def _resolve_conflicts(collection_terms_mapping: dict[str, set[str]]) \
240
- -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
249
+ def _resolve_conflicts(
250
+ collection_terms_mapping: dict[str, set[str]],
251
+ ) -> tuple[dict[str, set[str]], list[GenerationIssue]]: # noqa E127
241
252
  warnings: list[GenerationIssue] = list()
242
253
  conflicting_collection_ids_list: list[list[str]] = list()
243
254
  collection_ids: list[str] = list(collection_terms_mapping.keys())
@@ -247,13 +258,16 @@ class DrsGenerator(DrsApplication):
247
258
  conflicting_collection_ids: list[str] = list()
248
259
  for r_collection_index in range(l_collection_index + 1, len_collection_ids):
249
260
  if collection_terms_mapping[collection_ids[l_collection_index]].isdisjoint(
250
- collection_terms_mapping[collection_ids[r_collection_index]]):
261
+ collection_terms_mapping[collection_ids[r_collection_index]]
262
+ ):
251
263
  continue
252
264
  else:
253
265
  not_registered = True
254
266
  for cc_ids in conflicting_collection_ids_list:
255
- if collection_ids[l_collection_index] in cc_ids and \
256
- collection_ids[r_collection_index] in cc_ids:
267
+ if (
268
+ collection_ids[l_collection_index] in cc_ids
269
+ and collection_ids[r_collection_index] in cc_ids
270
+ ):
257
271
  not_registered = False
258
272
  break
259
273
  if not_registered:
@@ -287,10 +301,12 @@ class DrsGenerator(DrsApplication):
287
301
  # raise errors, remove the faulty collections and their term.
288
302
  if collection_ids_with_len_eq_1_list:
289
303
  for collection_ids_to_be_removed in collection_ids_with_len_eq_1_list:
290
- DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
291
- collection_ids_to_be_removed)
292
- DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
293
- collection_ids_to_be_removed)
304
+ DrsGenerator._remove_ids_from_conflicts(
305
+ conflicting_collection_ids_list, collection_ids_to_be_removed
306
+ )
307
+ DrsGenerator._remove_term_from_other_term_sets(
308
+ collection_terms_mapping, collection_ids_to_be_removed
309
+ )
294
310
  # Every time conflicting_collection_ids_list is modified, we must restart the loop,
295
311
  # as conflicting collections may be resolved.
296
312
  continue
@@ -307,10 +323,8 @@ class DrsGenerator(DrsApplication):
307
323
  warnings.append(issue)
308
324
  # 3.b Update conflicting collections.
309
325
  if wining_collection_ids:
310
- DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
311
- wining_collection_ids)
312
- DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
313
- wining_collection_ids)
326
+ DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
327
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
314
328
  # Every time conflicting_collection_ids_list is modified, we must restart the loop,
315
329
  # as conflicting collections may be resolved.
316
330
  continue
@@ -321,12 +335,14 @@ class DrsGenerator(DrsApplication):
321
335
  for collection_ids in conflicting_collection_ids_list:
322
336
  for collection_index in range(0, len(collection_ids)):
323
337
  collection_set = collection_ids[collection_index + 1:] + collection_ids[:collection_index]
324
- diff: set[str] = collection_terms_mapping[collection_ids[collection_index]]\
325
- .difference(*[collection_terms_mapping[index] # noqa E127
326
- for index in collection_set])
338
+ diff: set[str] = collection_terms_mapping[collection_ids[collection_index]].difference(
339
+ *[
340
+ collection_terms_mapping[index] # noqa E127
341
+ for index in collection_set
342
+ ]
343
+ )
327
344
  if len(diff) == 1:
328
- wining_id_and_term_pairs.append((collection_ids[collection_index],
329
- _get_first_item(diff)))
345
+ wining_id_and_term_pairs.append((collection_ids[collection_index], _get_first_item(diff)))
330
346
  # 4.b Update conflicting collections.
331
347
  if wining_id_and_term_pairs:
332
348
  wining_collection_ids = list()
@@ -336,18 +352,17 @@ class DrsGenerator(DrsApplication):
336
352
  collection_terms_mapping[collection_id].add(term)
337
353
  issue = AssignedTerm(collection_id=collection_id, term=term)
338
354
  warnings.append(issue)
339
- DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list,
340
- wining_collection_ids)
341
- DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping,
342
- wining_collection_ids)
355
+ DrsGenerator._remove_ids_from_conflicts(conflicting_collection_ids_list, wining_collection_ids)
356
+ DrsGenerator._remove_term_from_other_term_sets(collection_terms_mapping, wining_collection_ids)
343
357
  continue
344
358
  else:
345
359
  break # Stop the loop when no progress is made.
346
360
  return collection_terms_mapping, warnings
347
361
 
348
362
  @staticmethod
349
- def _check_collection_terms_mapping(collection_terms_mapping: dict[str, set[str]]) \
350
- -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
363
+ def _check_collection_terms_mapping(
364
+ collection_terms_mapping: dict[str, set[str]],
365
+ ) -> tuple[dict[str, str], list[GenerationIssue]]: # noqa E127
351
366
  errors: list[GenerationIssue] = list()
352
367
  # 1. Looking for collections that share strictly the same term(s).
353
368
  collection_ids: list[str] = list(collection_terms_mapping.keys())
@@ -363,8 +378,7 @@ class DrsGenerator(DrsApplication):
363
378
  if l_term_set and (not l_term_set.difference(r_term_set)):
364
379
  not_registered = True
365
380
  for faulty_collections in faulty_collections_list:
366
- if l_collection_id in faulty_collections or \
367
- r_collection_id in faulty_collections:
381
+ if l_collection_id in faulty_collections or r_collection_id in faulty_collections:
368
382
  faulty_collections.add(l_collection_id)
369
383
  faulty_collections.add(r_collection_id)
370
384
  not_registered = False
@@ -373,8 +387,9 @@ class DrsGenerator(DrsApplication):
373
387
  faulty_collections_list.append({l_collection_id, r_collection_id})
374
388
  for faulty_collections in faulty_collections_list:
375
389
  terms = collection_terms_mapping[_get_first_item(faulty_collections)]
376
- issue = ConflictingCollections(collection_ids=_transform_set_and_sort(faulty_collections),
377
- terms=_transform_set_and_sort(terms))
390
+ issue = ConflictingCollections(
391
+ collection_ids=_transform_set_and_sort(faulty_collections), terms=_transform_set_and_sort(terms)
392
+ )
378
393
  errors.append(issue)
379
394
  for collection_id in faulty_collections:
380
395
  del collection_terms_mapping[collection_id]
@@ -386,25 +401,28 @@ class DrsGenerator(DrsApplication):
386
401
  if len_term_set == 1:
387
402
  result[collection_id] = _get_first_item(term_set)
388
403
  elif len_term_set > 1:
389
- other_issue = TooManyTermCollection(collection_id=collection_id,
390
- terms=_transform_set_and_sort(term_set))
404
+ other_issue = TooManyTermCollection(
405
+ collection_id=collection_id, terms=_transform_set_and_sort(term_set)
406
+ )
391
407
  errors.append(other_issue)
392
408
  # else: Don't add emptied collection to the result.
393
409
  return result, errors
394
410
 
395
411
  @staticmethod
396
- def _remove_term_from_other_term_sets(collection_terms_mapping: dict[str, set[str]],
397
- collection_ids_to_be_removed: list[str]) -> None:
412
+ def _remove_term_from_other_term_sets(
413
+ collection_terms_mapping: dict[str, set[str]], collection_ids_to_be_removed: list[str]
414
+ ) -> None:
398
415
  for collection_id_to_be_removed in collection_ids_to_be_removed:
399
416
  # Should only be one term.
400
417
  term_to_be_removed: str = _get_first_item(collection_terms_mapping[collection_id_to_be_removed])
401
418
  for collection_id in collection_terms_mapping.keys():
402
- if (collection_id not in collection_ids_to_be_removed):
419
+ if collection_id not in collection_ids_to_be_removed:
403
420
  collection_terms_mapping[collection_id].discard(term_to_be_removed)
404
421
 
405
422
  @staticmethod
406
- def _remove_ids_from_conflicts(conflicting_collection_ids_list: list[list[str]],
407
- collection_ids_to_be_removed: list[str]) -> None:
423
+ def _remove_ids_from_conflicts(
424
+ conflicting_collection_ids_list: list[list[str]], collection_ids_to_be_removed: list[str]
425
+ ) -> None:
408
426
  for collection_id_to_be_removed in collection_ids_to_be_removed:
409
427
  for conflicting_collection_ids in conflicting_collection_ids_list:
410
428
  if collection_id_to_be_removed in conflicting_collection_ids: