esgvoc 0.4.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of esgvoc might be problematic. Click here for more details.

Files changed (73) hide show
  1. esgvoc/__init__.py +1 -1
  2. esgvoc/api/data_descriptors/__init__.py +50 -28
  3. esgvoc/api/data_descriptors/activity.py +3 -3
  4. esgvoc/api/data_descriptors/area_label.py +16 -1
  5. esgvoc/api/data_descriptors/branded_suffix.py +20 -0
  6. esgvoc/api/data_descriptors/branded_variable.py +12 -0
  7. esgvoc/api/data_descriptors/consortium.py +14 -13
  8. esgvoc/api/data_descriptors/contact.py +5 -0
  9. esgvoc/api/data_descriptors/conventions.py +6 -0
  10. esgvoc/api/data_descriptors/creation_date.py +5 -0
  11. esgvoc/api/data_descriptors/data_descriptor.py +14 -9
  12. esgvoc/api/data_descriptors/data_specs_version.py +5 -0
  13. esgvoc/api/data_descriptors/date.py +1 -1
  14. esgvoc/api/data_descriptors/directory_date.py +1 -1
  15. esgvoc/api/data_descriptors/experiment.py +13 -11
  16. esgvoc/api/data_descriptors/forcing_index.py +1 -1
  17. esgvoc/api/data_descriptors/frequency.py +3 -3
  18. esgvoc/api/data_descriptors/further_info_url.py +5 -0
  19. esgvoc/api/data_descriptors/grid_label.py +2 -2
  20. esgvoc/api/data_descriptors/horizontal_label.py +15 -1
  21. esgvoc/api/data_descriptors/initialisation_index.py +1 -1
  22. esgvoc/api/data_descriptors/institution.py +8 -5
  23. esgvoc/api/data_descriptors/known_branded_variable.py +23 -0
  24. esgvoc/api/data_descriptors/license.py +3 -3
  25. esgvoc/api/data_descriptors/mip_era.py +1 -1
  26. esgvoc/api/data_descriptors/model_component.py +1 -1
  27. esgvoc/api/data_descriptors/obs_type.py +5 -0
  28. esgvoc/api/data_descriptors/organisation.py +1 -1
  29. esgvoc/api/data_descriptors/physic_index.py +1 -1
  30. esgvoc/api/data_descriptors/product.py +2 -2
  31. esgvoc/api/data_descriptors/publication_status.py +5 -0
  32. esgvoc/api/data_descriptors/realisation_index.py +1 -1
  33. esgvoc/api/data_descriptors/realm.py +1 -1
  34. esgvoc/api/data_descriptors/region.py +5 -0
  35. esgvoc/api/data_descriptors/resolution.py +3 -3
  36. esgvoc/api/data_descriptors/source.py +9 -5
  37. esgvoc/api/data_descriptors/source_type.py +1 -1
  38. esgvoc/api/data_descriptors/table.py +3 -2
  39. esgvoc/api/data_descriptors/temporal_label.py +15 -1
  40. esgvoc/api/data_descriptors/time_range.py +4 -3
  41. esgvoc/api/data_descriptors/title.py +5 -0
  42. esgvoc/api/data_descriptors/tracking_id.py +5 -0
  43. esgvoc/api/data_descriptors/variable.py +25 -12
  44. esgvoc/api/data_descriptors/variant_label.py +3 -3
  45. esgvoc/api/data_descriptors/vertical_label.py +14 -0
  46. esgvoc/api/project_specs.py +117 -2
  47. esgvoc/api/projects.py +242 -279
  48. esgvoc/api/search.py +30 -3
  49. esgvoc/api/universe.py +42 -27
  50. esgvoc/apps/jsg/cmip6_template.json +74 -0
  51. esgvoc/apps/jsg/cmip6plus_template.json +74 -0
  52. esgvoc/apps/jsg/json_schema_generator.py +185 -0
  53. esgvoc/cli/config.py +500 -0
  54. esgvoc/cli/find.py +138 -0
  55. esgvoc/cli/get.py +43 -38
  56. esgvoc/cli/main.py +10 -3
  57. esgvoc/cli/status.py +27 -18
  58. esgvoc/cli/valid.py +10 -15
  59. esgvoc/core/db/models/project.py +11 -11
  60. esgvoc/core/db/models/universe.py +3 -3
  61. esgvoc/core/db/project_ingestion.py +40 -40
  62. esgvoc/core/db/universe_ingestion.py +36 -33
  63. esgvoc/core/logging_handler.py +24 -2
  64. esgvoc/core/repo_fetcher.py +61 -59
  65. esgvoc/core/service/data_merger.py +47 -34
  66. esgvoc/core/service/state.py +107 -83
  67. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/METADATA +7 -20
  68. esgvoc-1.0.0.dist-info/RECORD +95 -0
  69. esgvoc/core/logging.conf +0 -21
  70. esgvoc-0.4.0.dist-info/RECORD +0 -80
  71. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/WHEEL +0 -0
  72. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/entry_points.txt +0 -0
  73. {esgvoc-0.4.0.dist-info → esgvoc-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
esgvoc/cli/get.py CHANGED
@@ -1,35 +1,40 @@
1
+ import logging
2
+ import re
3
+ from typing import Any, List, Optional
1
4
 
2
- from typing import Any
3
- from esgvoc.api.projects import get_all_collections_in_project, get_all_projects, \
4
- get_all_terms_in_collection, get_term_in_project, get_term_in_collection
5
- from esgvoc.api.universe import find_terms_in_data_descriptor, find_terms_in_universe, \
6
- get_all_data_descriptors_in_universe, get_all_terms_in_data_descriptor, get_term_in_data_descriptor, \
7
- get_term_in_universe
8
- from pydantic import BaseModel
9
- from requests import logging
10
- from rich.table import Table
11
5
  import typer
12
- import re
13
- from rich.json import JSON
6
+ from pydantic import BaseModel
14
7
  from rich.console import Console
8
+ from rich.json import JSON
9
+ from rich.table import Table
10
+
11
+ from esgvoc.api.projects import (get_all_collections_in_project,
12
+ get_all_projects, get_all_terms_in_collection,
13
+ get_term_in_collection, get_term_in_project)
14
+ from esgvoc.api.universe import (find_terms_in_data_descriptor,
15
+ find_terms_in_universe,
16
+ get_all_data_descriptors_in_universe,
17
+ get_all_terms_in_data_descriptor,
18
+ get_term_in_data_descriptor,
19
+ get_term_in_universe)
15
20
 
16
21
  app = typer.Typer()
17
22
  console = Console()
18
23
 
19
24
  _LOGGER = logging.getLogger(__name__)
20
25
 
26
+
21
27
  def validate_key_format(key: str):
22
28
  """
23
29
  Validate if the key matches the XXXX:YYYY:ZZZZ format.
24
30
  """
25
- if not re.match(r"^[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_]*:[a-zA-Z0-9\/_.]*$", key):
31
+ if not re.match(r"^[a-zA-Z0-9\/_-]*:[a-zA-Z0-9\/_-]*:[a-zA-Z0-9\/_.-]*$", key):
26
32
  raise typer.BadParameter(f"Invalid key format: {key}. Must be XXXX:YYYY:ZZZZ.")
27
33
  return key.split(":")
28
34
 
29
35
 
30
- def handle_universe(data_descriptor_id:str|None,term_id:str|None, options=None):
31
- _LOGGER.debug(f"Handling universe with data_descriptor_id={data_descriptor_id}, term_id={term_id}")
32
-
36
+ def handle_universe(data_descriptor_id: str | None, term_id: str | None, options=None):
37
+ _LOGGER.debug(f"Handling universe with data_descriptor_id={data_descriptor_id}, term_id={term_id}")
33
38
  if data_descriptor_id and term_id:
34
39
  return get_term_in_data_descriptor(data_descriptor_id, term_id, options)
35
40
  # BaseModel|dict[str: BaseModel]|None:
@@ -38,29 +43,28 @@ def handle_universe(data_descriptor_id:str|None,term_id:str|None, options=None):
38
43
  return get_term_in_universe(term_id, options)
39
44
  # dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
40
45
 
41
-
42
46
  elif data_descriptor_id:
43
- return get_all_terms_in_data_descriptor(data_descriptor_id)
47
+ return get_all_terms_in_data_descriptor(data_descriptor_id, options)
44
48
  # dict[str, BaseModel]|None:
45
49
 
46
50
  else:
47
51
  return get_all_data_descriptors_in_universe()
48
52
  # dict[str, dict]:
49
53
 
50
- def handle_project(project_id:str,collection_id:str|None,term_id:str|None,options=None):
54
+
55
+ def handle_project(project_id: str, collection_id: str | None, term_id: str | None, options=None):
51
56
  _LOGGER.debug(f"Handling project {project_id} with Y={collection_id}, Z={term_id}, options = {options}")
52
-
57
+
53
58
  if project_id and collection_id and term_id:
54
59
  return get_term_in_collection(project_id, collection_id, term_id, options)
55
60
  # BaseModel|dict[str: BaseModel]|None:
56
61
 
57
62
  elif term_id:
58
- return get_term_in_project(project_id, term_id,options)
63
+ return get_term_in_project(project_id, term_id, options)
59
64
  # dict[str, BaseModel] | dict[str, dict[str, BaseModel]] | None:
60
65
 
61
-
62
66
  elif collection_id:
63
- return get_all_terms_in_collection(project_id, collection_id)
67
+ return get_all_terms_in_collection(project_id, collection_id, options)
64
68
  # dict[str, BaseModel]|None:
65
69
 
66
70
  else:
@@ -72,12 +76,11 @@ def handle_project(project_id:str,collection_id:str|None,term_id:str|None,option
72
76
  # dict[str, dict]:
73
77
 
74
78
 
75
- def handle_unknown(x:str|None,y:str|None,z:str|None):
79
+ def handle_unknown(x: str | None, y: str | None, z: str | None):
76
80
  print(f"Something wrong in X,Y or Z : X={x}, Y={y}, Z={z}")
77
81
 
78
82
 
79
- def display(data:Any):
80
-
83
+ def display(data: Any):
81
84
  if isinstance(data, BaseModel):
82
85
  # Pydantic Model
83
86
  console.print(JSON.from_data(data.model_dump()))
@@ -96,8 +99,12 @@ def display(data:Any):
96
99
  # Fallback to simple print
97
100
  console.print(data)
98
101
 
102
+
99
103
  @app.command()
100
- def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZZZ format")):
104
+ def get(
105
+ keys: List[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZZZ format"),
106
+ select: Optional[List[str]] = typer.Option(None, "--select", help="keys selected for the result"),
107
+ ):
101
108
  """
102
109
  Retrieve a specific value from the database system.\n
103
110
  This command allows you to fetch a value by specifying the universe/project, data_descriptor/collection,
@@ -113,7 +120,7 @@ def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZ
113
120
  <term>\t\tThe term id within the specified collection.\n
114
121
  \n
115
122
  Example:
116
- To retrieve the value from the "cmip6plus" project, under the "institution_id" column, the term with the identifier "ipsl", you would use: \n
123
+ To retrieve the value from the "cmip6plus" project, under the "institution_id" column, the term with the identifier "ipsl", you would use: \n
117
124
  `get cmip6plus:institution_id:ipsl`\n
118
125
  The default project is the universe CV : the argument would be like `universe:institution:ipsl` or `:institution:ipsl` \n
119
126
  - to get list of available term from universe institution `:institution:` \n
@@ -124,23 +131,21 @@ def get(keys: list[str] = typer.Argument(..., help="List of keys in XXXX:YYYY:ZZ
124
131
  - Use a colon (`:`) to separate the parts of the argument. \n
125
132
  - if more than one argument is given i.e get X:Y:Z A:B:C the 2 results are appended. \n
126
133
  \n
127
- """
134
+ """
128
135
  known_projects = get_all_projects()
129
136
 
130
137
  # Validate and process each key
131
138
  for key in keys:
132
139
  validated_key = validate_key_format(key)
133
140
  _LOGGER.debug(f"Processed key: {validated_key}")
134
- where,what,who = validated_key
135
- what = what if what!="" else None
136
- who = who if who!="" else None
137
- if where == "" or where=="universe":
138
- res = handle_universe(what,who)
141
+ where, what, who = validated_key
142
+ what = what if what != "" else None
143
+ who = who if who != "" else None
144
+ if where == "" or where == "universe":
145
+ res = handle_universe(what, who, select)
139
146
  elif where in known_projects:
140
- res = handle_project(where,what,who,None)
147
+ res = handle_project(where, what, who, select)
141
148
  else:
142
- res = handle_unknown(where,what,who)
143
-
144
- display(res)
145
-
149
+ res = handle_unknown(where, what, who)
146
150
 
151
+ display(res)
esgvoc/cli/main.py CHANGED
@@ -1,9 +1,13 @@
1
1
  import typer
2
+
3
+ from esgvoc.cli.config import app as config_app
4
+ from esgvoc.cli.drs import app as drs_app
5
+ from esgvoc.cli.find import app as find_app
2
6
  from esgvoc.cli.get import app as get_app
7
+ from esgvoc.cli.install import app as install_app
3
8
  from esgvoc.cli.status import app as status_app
4
9
  from esgvoc.cli.valid import app as valid_app
5
- from esgvoc.cli.install import app as install_app
6
- from esgvoc.cli.drs import app as drs_app
10
+
7
11
  app = typer.Typer()
8
12
 
9
13
  # Register the subcommands
@@ -12,10 +16,13 @@ app.add_typer(status_app)
12
16
  app.add_typer(valid_app)
13
17
  app.add_typer(install_app)
14
18
  app.add_typer(drs_app)
19
+ app.add_typer(config_app, name="config")
20
+ app.add_typer(find_app)
21
+
15
22
 
16
23
  def main():
17
24
  app()
18
-
25
+
19
26
 
20
27
  if __name__ == "__main__":
21
28
  main()
esgvoc/cli/status.py CHANGED
@@ -1,38 +1,47 @@
1
- from esgvoc.core import service
2
- from rich.table import Table
3
1
  import typer
4
2
  from rich.console import Console
3
+ from rich.table import Table
4
+
5
+ from esgvoc.core import service
5
6
 
6
7
  app = typer.Typer()
7
8
  console = Console()
8
9
 
9
10
 
10
11
  def display(table):
11
- console = Console(record=True,width=200)
12
+ console = Console(record=True, width=200)
12
13
  console.print(table)
13
14
 
14
15
 
15
-
16
16
  @app.command()
17
17
  def status():
18
18
  """
19
- Command to display status
20
- i.e summary of version of usable ressources (between remote/cached)
21
-
19
+ Command to display status
20
+ i.e summary of version of usable ressources (between remote/cached)
21
+
22
22
  """
23
- assert(service.current_state is not None)
23
+ assert service.current_state is not None
24
24
  service.current_state.get_state_summary()
25
- #display(service.state_service.table())
26
-
25
+ # display(service.state_service.table())
27
26
 
28
27
  table = Table(show_header=False, show_lines=True)
29
28
 
30
- table.add_row("","Remote github repo","Local repository","Cache Database", style = "bright_green")
31
- table.add_row("Universe path",service.current_state.universe.github_repo,service.current_state.universe.local_path,service.current_state.universe.db_path, style = "white")
32
- table.add_row("Version",service.current_state.universe.github_version,service.current_state.universe.local_version,service.current_state.universe.db_version, style="bright_blue")
33
- for proj_name,proj in service.current_state.projects.items():
34
- table.add_row(f"{proj_name} path",proj.github_repo,proj.local_path,proj.db_path, style="white")
35
- table.add_row("Version",proj.github_version,proj.local_version,proj.db_version,style ="bright_blue")
29
+ table.add_row("", "Remote github repo", "Local repository", "Cache Database", style="bright_green")
30
+ table.add_row(
31
+ "Universe path",
32
+ service.current_state.universe.github_repo,
33
+ service.current_state.universe.local_path,
34
+ service.current_state.universe.db_path,
35
+ style="white",
36
+ )
37
+ table.add_row(
38
+ "Version",
39
+ service.current_state.universe.github_version,
40
+ service.current_state.universe.local_version,
41
+ service.current_state.universe.db_version,
42
+ style="bright_blue",
43
+ )
44
+ for proj_name, proj in service.current_state.projects.items():
45
+ table.add_row(f"{proj_name} path", proj.github_repo, proj.local_path, proj.db_path, style="white")
46
+ table.add_row("Version", proj.github_version, proj.local_version, proj.db_version, style="bright_blue")
36
47
  display(table)
37
-
38
-
esgvoc/cli/valid.py CHANGED
@@ -1,26 +1,21 @@
1
1
 
2
+ import re
2
3
  from typing import List
3
- from esgvoc.api.projects import (
4
- valid_term,
5
- valid_term_in_collection,
6
- valid_term_in_project,
7
- valid_term_in_all_projects
8
- )
9
- from requests import logging
10
- from rich.table import Table
4
+
11
5
  import typer
12
- import re
13
6
  from rich.console import Console
7
+ from rich.table import Table
8
+
9
+ from esgvoc.api.projects import valid_term, valid_term_in_all_projects, valid_term_in_collection, valid_term_in_project
14
10
 
15
11
  app = typer.Typer()
16
12
  console = Console()
17
13
 
18
- _LOGGER = logging.getLogger(__name__)
19
14
 
20
15
  @app.command()
21
16
  def valid(
22
17
  strings_targets: List[str] = typer.Argument(
23
- ...,
18
+ ...,
24
19
  help=(
25
20
  "Pairs of strings to validate against a key in the form '<StringToValidate> <Project:Collection:Term>'.\n"
26
21
  "Multiple pairs can be provided. The key '<Project:Collection:Term>' consists of three parts:\n"
@@ -51,7 +46,7 @@ def valid(
51
46
  \t\t- A string to validate.\n
52
47
  \t\t- A key in the form '<Project:Collection:Term>'.\n
53
48
  Usage :\n
54
- \tValid one:\n
49
+ \tValid one:\n
55
50
  \tesgvocab valid IPSL cmip6plus:institution_id:ipsl\n
56
51
  \tesgvocab valid IPSL cmip6plus:institution_id:\n
57
52
  \tesgvocab valid IPSL cmip6plus::\n
@@ -67,7 +62,7 @@ def valid(
67
62
  \tesgvocab valid IPSL :: IPS :: \n
68
63
  \t\tresult will be [True, False]\n
69
64
  \n
70
- \tesgvocab valid --verbose IPS :: IPSL ::\n
65
+ \tesgvocab valid --verbose IPS :: IPSL ::\n
71
66
  \tresult will be \n
72
67
  \t\t┏━━━━━━━━┳━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n
73
68
  \t\t┃ String ┃ Key ┃ Result ┃ Errors ┃\n
@@ -83,7 +78,7 @@ def valid(
83
78
 
84
79
  # Combine string and target into pairs
85
80
  pairs = [strings_targets[i] + " " + strings_targets[i + 1] for i in range(0, len(strings_targets), 2)]
86
-
81
+
87
82
  # Validate each string against each target
88
83
  for validation in pairs:
89
84
  match = re.match(r"(.+)\s+([^:]*):([^:]*):([^:]*)", validation)
@@ -109,7 +104,7 @@ def valid(
109
104
  except Exception as e:
110
105
  validation_result=False
111
106
  exception_message = repr(e)
112
-
107
+
113
108
  # Handle validation result
114
109
 
115
110
  if validation_result:
@@ -10,18 +10,18 @@ import esgvoc.core.db.connection as db
10
10
  from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
11
11
  from esgvoc.core.exceptions import EsgvocDbError
12
12
 
13
- _LOGGER = logging.getLogger("project_db_creation")
13
+ _LOGGER = logging.getLogger(__name__)
14
14
 
15
15
 
16
16
  class Project(SQLModel, PkMixin, IdMixin, table=True):
17
17
  __tablename__ = "projects"
18
18
  specs: dict = Field(sa_column=sa.Column(JSON))
19
19
  git_hash: str
20
- collections: list["Collection"] = Relationship(back_populates="project")
20
+ collections: list["PCollection"] = Relationship(back_populates="project")
21
21
 
22
22
 
23
- class Collection(SQLModel, PkMixin, IdMixin, table=True):
24
- __tablename__ = "collections"
23
+ class PCollection(SQLModel, PkMixin, IdMixin, table=True):
24
+ __tablename__ = "pcollections"
25
25
  data_descriptor_id: str = Field(index=True)
26
26
  context: dict = Field(sa_column=sa.Column(JSON))
27
27
  project_pk: int | None = Field(default=None, foreign_key="projects.pk")
@@ -44,8 +44,8 @@ class PTerm(SQLModel, PkMixin, IdMixin, table=True):
44
44
  __tablename__ = "pterms"
45
45
  specs: dict = Field(sa_column=sa.Column(JSON))
46
46
  kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
47
- collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
48
- collection: Collection = Relationship(back_populates="terms")
47
+ collection_pk: int | None = Field(default=None, foreign_key="pcollections.pk")
48
+ collection: PCollection = Relationship(back_populates="terms")
49
49
  __table_args__ = (sa.Index("drs_name_index", specs.sa_column["drs_name"]), ) # type: ignore
50
50
 
51
51
 
@@ -55,7 +55,7 @@ class PTermFTS5(SQLModel, PkMixin, IdMixin, table=True):
55
55
  __tablename__ = "pterms_fts5"
56
56
  specs: dict = Field(sa_column=sa.Column(JSON))
57
57
  kind: TermKind = Field(sa_column=Column(sa.Enum(TermKind)))
58
- collection_pk: int | None = Field(default=None, foreign_key="collections.pk")
58
+ collection_pk: int | None = Field(default=None, foreign_key="pcollections.pk")
59
59
 
60
60
 
61
61
  def project_create_db(db_file_path: Path):
@@ -68,7 +68,7 @@ def project_create_db(db_file_path: Path):
68
68
  try:
69
69
  # Do not include pterms_fts5 table: it is build from a raw SQL query.
70
70
  tables_to_be_created = [SQLModel.metadata.tables['projects'],
71
- SQLModel.metadata.tables['collections'],
71
+ SQLModel.metadata.tables['pcollections'],
72
72
  SQLModel.metadata.tables['pterms']]
73
73
  SQLModel.metadata.create_all(connection.get_engine(), tables=tables_to_be_created)
74
74
  except Exception as e:
@@ -77,8 +77,8 @@ def project_create_db(db_file_path: Path):
77
77
  raise EsgvocDbError(msg) from e
78
78
  try:
79
79
  with connection.create_session() as session:
80
- sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pterms_fts5 USING ' + \
81
- 'fts5(pk, id, specs, kind, collection_pk, content=pterms, content_rowid=pk);'
80
+ sql_query = "CREATE VIRTUAL TABLE IF NOT EXISTS pterms_fts5 USING " + \
81
+ "fts5(pk, id, specs, kind, collection_pk, content=pterms, content_rowid=pk, prefix=3);"
82
82
  session.exec(text(sql_query)) # type: ignore
83
83
  session.commit()
84
84
  except Exception as e:
@@ -89,7 +89,7 @@ def project_create_db(db_file_path: Path):
89
89
  with connection.create_session() as session:
90
90
  sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS pcollections_fts5 USING ' + \
91
91
  'fts5(pk, id, data_descriptor_id, context, project_pk, ' + \
92
- 'term_kind, content=collections, content_rowid=pk);'
92
+ 'term_kind, content=pcollections, content_rowid=pk, prefix=3);'
93
93
  session.exec(text(sql_query)) # type: ignore
94
94
  session.commit()
95
95
  except Exception as e:
@@ -10,7 +10,7 @@ import esgvoc.core.db.connection as db
10
10
  from esgvoc.core.db.models.mixins import IdMixin, PkMixin, TermKind
11
11
  from esgvoc.core.exceptions import EsgvocDbError
12
12
 
13
- _LOGGER = logging.getLogger("universe_db_creation")
13
+ _LOGGER = logging.getLogger(__name__)
14
14
 
15
15
 
16
16
  class Universe(SQLModel, PkMixin, table=True):
@@ -74,7 +74,7 @@ def universe_create_db(db_file_path: Path) -> None:
74
74
  try:
75
75
  with connection.create_session() as session:
76
76
  sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS uterms_fts5 USING ' + \
77
- 'fts5(pk, id, specs, kind, data_descriptor_pk, content=uterms, content_rowid=pk);'
77
+ 'fts5(pk, id, specs, kind, data_descriptor_pk, content=uterms, content_rowid=pk, prefix=3);'
78
78
  session.exec(text(sql_query)) # type: ignore
79
79
  session.commit()
80
80
  except Exception as e:
@@ -85,7 +85,7 @@ def universe_create_db(db_file_path: Path) -> None:
85
85
  with connection.create_session() as session:
86
86
  sql_query = 'CREATE VIRTUAL TABLE IF NOT EXISTS udata_descriptors_fts5 USING ' + \
87
87
  'fts5(pk, id, universe_pk, context, ' + \
88
- 'term_kind, content=udata_descriptors, content_rowid=pk);'
88
+ 'term_kind, content=udata_descriptors, content_rowid=pk, prefix=3);'
89
89
  session.exec(text(sql_query)) # type: ignore
90
90
  session.commit()
91
91
  except Exception as e:
@@ -10,11 +10,11 @@ import esgvoc.core.service as service
10
10
  from esgvoc.core.data_handler import JsonLdResource
11
11
  from esgvoc.core.db.connection import DBConnection, read_json_file
12
12
  from esgvoc.core.db.models.mixins import TermKind
13
- from esgvoc.core.db.models.project import Collection, Project, PTerm
13
+ from esgvoc.core.db.models.project import PCollection, Project, PTerm
14
14
  from esgvoc.core.exceptions import EsgvocDbError
15
15
  from esgvoc.core.service.data_merger import DataMerger
16
16
 
17
- _LOGGER = logging.getLogger("project_ingestion")
17
+ _LOGGER = logging.getLogger(__name__)
18
18
 
19
19
 
20
20
  def infer_term_kind(json_specs: dict) -> TermKind:
@@ -34,52 +34,53 @@ def ingest_metadata_project(connection: DBConnection, git_hash):
34
34
 
35
35
 
36
36
  def get_data_descriptor_id_from_context(collection_context: dict) -> str:
37
- data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY]\
38
- [esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY] # noqa E211
37
+ data_descriptor_url = collection_context[esgvoc.core.constants.CONTEXT_JSON_KEY][
38
+ esgvoc.core.constants.DATA_DESCRIPTOR_JSON_KEY
39
+ ] # noqa E211
39
40
  return Path(data_descriptor_url).name
40
41
 
41
42
 
42
- def instantiate_project_term(universe_term_json_specs: dict,
43
- project_term_json_specs_update: dict,
44
- pydantic_class: type[BaseModel]) -> dict:
43
+ def instantiate_project_term(
44
+ universe_term_json_specs: dict, project_term_json_specs_update: dict, pydantic_class: type[BaseModel]
45
+ ) -> dict:
45
46
  term_from_universe = pydantic_class(**universe_term_json_specs)
46
- updated_term = term_from_universe.model_copy(
47
- update=project_term_json_specs_update, deep=True
48
- )
47
+ updated_term = term_from_universe.model_copy(update=project_term_json_specs_update, deep=True)
49
48
  return updated_term.model_dump()
50
49
 
51
50
 
52
- def ingest_collection(collection_dir_path: Path,
53
- project: Project,
54
- project_db_session) -> None:
51
+ def ingest_collection(collection_dir_path: Path, project: Project, project_db_session) -> None:
55
52
  collection_id = collection_dir_path.name
56
53
  collection_context_file_path = collection_dir_path.joinpath(esgvoc.core.constants.CONTEXT_FILENAME)
57
54
  try:
58
55
  collection_context = read_json_file(collection_context_file_path)
59
56
  data_descriptor_id = get_data_descriptor_id_from_context(collection_context)
60
57
  except Exception as e:
61
- msg = f'unable to read project context file {collection_context_file_path}'
58
+ msg = f"unable to read project context file {collection_context_file_path}"
62
59
  _LOGGER.fatal(msg)
63
60
  raise EsgvocDbError(msg) from e
64
61
  # [KEEP]
65
- collection = Collection(
62
+ collection = PCollection(
66
63
  id=collection_id,
67
64
  context=collection_context,
68
65
  project=project,
69
66
  data_descriptor_id=data_descriptor_id,
70
- term_kind="") # We ll know it only when we ll add a term
71
- # (hypothesis all term have the same kind in a collection) # noqa E116
67
+ term_kind="",
68
+ ) # We ll know it only when we ll add a term
69
+ # (hypothesis all term have the same kind in a collection) # noqa E116
72
70
  term_kind_collection = None
73
71
 
74
72
  for term_file_path in collection_dir_path.iterdir():
75
73
  _LOGGER.debug(f"found term path : {term_file_path}")
76
74
  if term_file_path.is_file() and term_file_path.suffix == ".json":
77
75
  try:
78
- locally_avail = {"https://espri-mod.github.io/mip-cmor-tables":
79
- service.current_state.universe.local_path}
80
- json_specs = DataMerger(data=JsonLdResource(uri=str(term_file_path)),
81
- # locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/WCRP-universe"}).merge_linked_json()[-1]
82
- locally_available=locally_avail).merge_linked_json()[-1]
76
+ locally_avail = {
77
+ "https://espri-mod.github.io/mip-cmor-tables": service.current_state.universe.local_path
78
+ }
79
+ json_specs = DataMerger(
80
+ data=JsonLdResource(uri=str(term_file_path)),
81
+ # locally_available={"https://espri-mod.github.io/mip-cmor-tables":".cache/repos/WCRP-universe"}).merge_linked_json()[-1]
82
+ locally_available=locally_avail,
83
+ ).merge_linked_json()[-1]
83
84
  term_kind = infer_term_kind(json_specs)
84
85
  term_id = json_specs["id"]
85
86
 
@@ -87,7 +88,7 @@ def ingest_collection(collection_dir_path: Path,
87
88
  term_kind_collection = term_kind
88
89
 
89
90
  except Exception as e:
90
- _LOGGER.warning(f'Unable to read term {term_file_path}. Skip.\n{str(e)}')
91
+ _LOGGER.warning(f"Unable to read term {term_file_path}. Skip.\n{str(e)}")
91
92
  continue
92
93
  try:
93
94
  term = PTerm(
@@ -108,14 +109,11 @@ def ingest_collection(collection_dir_path: Path,
108
109
  project_db_session.add(collection)
109
110
 
110
111
 
111
- def ingest_project(project_dir_path: Path,
112
- project_db_file_path: Path,
113
- git_hash: str
114
- ):
112
+ def ingest_project(project_dir_path: Path, project_db_file_path: Path, git_hash: str):
115
113
  try:
116
114
  project_connection = db.DBConnection(project_db_file_path)
117
115
  except Exception as e:
118
- msg = f'unable to read project SQLite file at {project_db_file_path}'
116
+ msg = f"unable to read project SQLite file at {project_db_file_path}"
119
117
  _LOGGER.fatal(msg)
120
118
  raise EsgvocDbError(msg) from e
121
119
 
@@ -125,7 +123,7 @@ def ingest_project(project_dir_path: Path,
125
123
  project_json_specs = read_json_file(project_specs_file_path)
126
124
  project_id = project_json_specs[esgvoc.core.constants.PROJECT_ID_JSON_KEY]
127
125
  except Exception as e:
128
- msg = f'unable to read project specs file {project_specs_file_path}'
126
+ msg = f"unable to read project specs file {project_specs_file_path}"
129
127
  _LOGGER.fatal(msg)
130
128
  raise EsgvocDbError(msg) from e
131
129
 
@@ -137,11 +135,9 @@ def ingest_project(project_dir_path: Path,
137
135
  if collection_dir_path.is_dir() and (collection_dir_path / "000_context.jsonld").exists():
138
136
  _LOGGER.debug(f"found collection dir : {collection_dir_path}")
139
137
  try:
140
- ingest_collection(collection_dir_path,
141
- project,
142
- project_db_session)
138
+ ingest_collection(collection_dir_path, project, project_db_session)
143
139
  except Exception as e:
144
- msg = f'unexpected error while ingesting collection {collection_dir_path}'
140
+ msg = f"unexpected error while ingesting collection {collection_dir_path}"
145
141
  _LOGGER.fatal(msg)
146
142
  raise EsgvocDbError(msg) from e
147
143
  project_db_session.commit()
@@ -149,21 +145,25 @@ def ingest_project(project_dir_path: Path,
149
145
  # Well, the following instructions are not data duplication. It is more building an index.
150
146
  # Read: https://sqlite.org/fts5.html
151
147
  try:
152
- sql_query = 'INSERT INTO pterms_fts5(pk, id, specs, kind, collection_pk) ' + \
153
- 'SELECT pk, id, specs, kind, collection_pk FROM pterms;' # noqa: S608
148
+ sql_query = (
149
+ "INSERT INTO pterms_fts5(pk, id, specs, kind, collection_pk) " # noqa: S608
150
+ + "SELECT pk, id, specs, kind, collection_pk FROM pterms;"
151
+ )
154
152
  project_db_session.exec(text(sql_query)) # type: ignore
155
153
  except Exception as e:
156
- msg = f'unable to insert rows into pterms_fts5 table for {project_db_file_path}'
154
+ msg = f"unable to insert rows into pterms_fts5 table for {project_db_file_path}"
157
155
  _LOGGER.fatal(msg)
158
156
  raise EsgvocDbError(msg) from e
159
157
  project_db_session.commit()
160
158
  try:
161
- sql_query = 'INSERT INTO pcollections_fts5(pk, id, data_descriptor_id, context, ' + \
162
- 'project_pk, term_kind) SELECT pk, id, data_descriptor_id, context, ' + \
163
- 'project_pk, term_kind FROM collections;' # noqa: S608
159
+ sql_query = (
160
+ "INSERT INTO pcollections_fts5(pk, id, data_descriptor_id, context, " # noqa: S608
161
+ + "project_pk, term_kind) SELECT pk, id, data_descriptor_id, context, "
162
+ + "project_pk, term_kind FROM pcollections;"
163
+ )
164
164
  project_db_session.exec(text(sql_query)) # type: ignore
165
165
  except Exception as e:
166
- msg = f'unable to insert rows into pcollections_fts5 table for {project_db_file_path}'
166
+ msg = f"unable to insert rows into pcollections_fts5 table for {project_db_file_path}"
167
167
  _LOGGER.fatal(msg)
168
168
  raise EsgvocDbError(msg) from e
169
169
  project_db_session.commit()