ghga-transpiler 1.4.0__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/PKG-INFO +4 -3
  2. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/pyproject.toml +5 -4
  3. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/__init__.py +7 -0
  4. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/cli.py +2 -2
  5. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/core.py +18 -9
  6. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/transformations.py +22 -1
  7. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler.egg-info/PKG-INFO +3 -2
  8. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler.egg-info/requires.txt +2 -1
  9. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/tests/test_process_workbook.py +5 -3
  10. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/LICENSE +0 -0
  11. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/README.md +0 -0
  12. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/setup.cfg +0 -0
  13. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/__main__.py +0 -0
  14. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/config/__init__.py +0 -0
  15. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/config/config.py +0 -0
  16. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/config/exceptions.py +0 -0
  17. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/configs/__init__.py +0 -0
  18. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler/io.py +0 -0
  19. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler.egg-info/SOURCES.txt +0 -0
  20. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler.egg-info/dependency_links.txt +0 -0
  21. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler.egg-info/entry_points.txt +0 -0
  22. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/src/ghga_transpiler.egg-info/top_level.txt +0 -0
  23. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/tests/test_convert_workbook.py +0 -0
  24. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/tests/test_create_config.py +0 -0
  25. {ghga-transpiler-1.4.0 → ghga_transpiler-2.0.0}/tests/test_io.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
- Name: ghga-transpiler
3
- Version: 1.4.0
2
+ Name: ghga_transpiler
3
+ Version: 2.0.0
4
4
  Summary: GHGA-Transpiler - excel to JSON converter
5
5
  Author-email: "German Human Genome Phenome Archive (GHGA)" <contact@ghga.de>
6
6
  License: Apache 2.0
@@ -19,9 +19,10 @@ Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
20
  Requires-Dist: typer~=0.9.0
21
21
  Requires-Dist: openpyxl==3.*,>=3.1.2
22
- Requires-Dist: defusedxml
22
+ Requires-Dist: defusedxml==0.*,>=0.7
23
23
  Requires-Dist: pydantic<3,>=2
24
24
  Requires-Dist: PyYAML~=6.0
25
+ Requires-Dist: semver==3.*
25
26
 
26
27
 
27
28
  [![tests](https://github.com/ghga-de/ghga-transpiler/actions/workflows/tests.yaml/badge.svg)](https://github.com/ghga-de/ghga-transpiler/actions/workflows/unit_and_int_tests.yaml)
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  # please adapt to package name
7
- name = "ghga-transpiler"
8
- version = "1.4.0"
7
+ name = "ghga_transpiler"
8
+ version = "2.0.0"
9
9
  description = "GHGA-Transpiler - excel to JSON converter"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -27,9 +27,10 @@ classifiers = [
27
27
  dependencies = [
28
28
  "typer ~= 0.9.0",
29
29
  "openpyxl >= 3.1.2, == 3.*",
30
- "defusedxml", # whichever version is compatible with openpyxl
30
+ "defusedxml >= 0.7, == 0.*",
31
31
  "pydantic >=2, <3",
32
- "PyYAML ~= 6.0"
32
+ "PyYAML ~= 6.0",
33
+ "semver == 3.*"
33
34
  ]
34
35
 
35
36
  [project.urls]
@@ -17,4 +17,11 @@
17
17
 
18
18
  from importlib.metadata import version
19
19
 
20
+ from openpyxl.xml import DEFUSEDXML
21
+
20
22
  __version__ = version(__package__)
23
+
24
+ if not DEFUSEDXML:
25
+ raise RuntimeError(
26
+ "The 'defusedxml' package must be present to safely run ghga-transpiler."
27
+ )
@@ -22,7 +22,7 @@ import typer
22
22
 
23
23
  from . import __version__, io
24
24
  from .config.exceptions import UnknownVersionError
25
- from .core import convert_workbook
25
+ from .core import InvalidSematicVersion, convert_workbook
26
26
 
27
27
  cli = typer.Typer()
28
28
 
@@ -66,7 +66,7 @@ def transpile(
66
66
  """
67
67
  try:
68
68
  ghga_workbook = io.read_workbook(spread_sheet)
69
- except (SyntaxError, UnknownVersionError) as exc:
69
+ except (SyntaxError, UnknownVersionError, InvalidSematicVersion) as exc:
70
70
  sys.exit(f"Unable to parse input file '{spread_sheet}': {exc}")
71
71
 
72
72
  converted = convert_workbook(ghga_workbook)
@@ -15,16 +15,17 @@
15
15
  #
16
16
 
17
17
  """This module contains functionalities for processing excel sheets into json object."""
18
- import re
19
18
  from importlib import resources
20
19
  from typing import Callable, Optional, Union
21
20
 
21
+ import semver
22
22
  from openpyxl import Workbook
23
23
 
24
24
  from . import config
25
25
 
26
- # pylint: disable=line-too-long
27
- SEMVER_REGEX = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
26
+
27
+ class InvalidSematicVersion(Exception):
28
+ """Raised when a version string is invalid."""
28
29
 
29
30
 
30
31
  class GHGAWorkbook:
@@ -33,20 +34,28 @@ class GHGAWorkbook:
33
34
  def __init__(self, workbook: Workbook, configs_package: resources.Package):
34
35
  """Create a new GHGAWorkbook object from an XLSX workbook"""
35
36
  self.workbook = workbook
36
- self.version = GHGAWorkbook._get_version(workbook)
37
- self.config = config.load_config(self.version, configs_package)
37
+ self.wb_version = GHGAWorkbook._get_version(workbook)
38
+ self.config = config.load_config(self.major_minor_version, configs_package)
38
39
 
39
40
  @staticmethod
40
41
  def _get_version(workbook):
41
42
  """Function to get workbook version from the worksheet _properties"""
42
43
  if "__properties" in workbook.sheetnames:
43
- version = str(workbook["__properties"].cell(1, 1).value)
44
- if re.fullmatch(SEMVER_REGEX, version):
45
- return version
44
+ try:
45
+ return semver.Version.parse(workbook["__properties"].cell(1, 1).value)
46
+ except ValueError:
47
+ raise InvalidSematicVersion(
48
+ "Unable to extract metadata model version from the provided workbook (not a valid semantic version)."
49
+ ) from None
46
50
  raise SyntaxError(
47
- "Unable to extract metadata version from the provided workbook."
51
+ "Unable to extract metadata model version from the provided workbook (missing)."
48
52
  )
49
53
 
54
+ @property
55
+ def major_minor_version(self):
56
+ """Returns only major and minor version numbers"""
57
+ return f"{self.wb_version.major}.{self.wb_version.minor}"
58
+
50
59
 
51
60
  def get_worksheet_rows(
52
61
  worksheet,
@@ -38,7 +38,28 @@ def to_attributes() -> Callable:
38
38
  return dict(zip(("key", "value"), splitted))
39
39
 
40
40
  def split_mult(value: str) -> list[dict]:
41
- """Function to convert string to attributes"""
41
+ """Converts string to attributes"""
42
42
  return [split_one(elem) for elem in split_by_semicolon(value)]
43
43
 
44
44
  return split_mult
45
+
46
+
47
+ def snake_case(cv: str) -> str:
48
+ """Converts format of a string to SNAKE_CASE"""
49
+ return cv.replace(" ", "_").upper()
50
+
51
+
52
+ def to_snake_case() -> Callable:
53
+ """Returns a function that converts a string to SNAKE_CASE"""
54
+ return snake_case
55
+
56
+
57
+ def snake_case_list(value: str) -> list[str]:
58
+ """Combines the functions to split_by_semicolon and convert_to_snake_case"""
59
+ list_to_convert = split_by_semicolon(value)
60
+ return [snake_case(elem) for elem in list_to_convert]
61
+
62
+
63
+ def to_snake_case_list() -> Callable:
64
+ """Returns a function that converts a semicolon separated string into a list of snake-cased strings"""
65
+ return snake_case_list
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ghga-transpiler
3
- Version: 1.4.0
3
+ Version: 2.0.0
4
4
  Summary: GHGA-Transpiler - excel to JSON converter
5
5
  Author-email: "German Human Genome Phenome Archive (GHGA)" <contact@ghga.de>
6
6
  License: Apache 2.0
@@ -19,9 +19,10 @@ Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
20
  Requires-Dist: typer~=0.9.0
21
21
  Requires-Dist: openpyxl==3.*,>=3.1.2
22
- Requires-Dist: defusedxml
22
+ Requires-Dist: defusedxml==0.*,>=0.7
23
23
  Requires-Dist: pydantic<3,>=2
24
24
  Requires-Dist: PyYAML~=6.0
25
+ Requires-Dist: semver==3.*
25
26
 
26
27
 
27
28
  [![tests](https://github.com/ghga-de/ghga-transpiler/actions/workflows/tests.yaml/badge.svg)](https://github.com/ghga-de/ghga-transpiler/actions/workflows/unit_and_int_tests.yaml)
@@ -1,5 +1,6 @@
1
1
  typer~=0.9.0
2
2
  openpyxl==3.*,>=3.1.2
3
- defusedxml
3
+ defusedxml==0.*,>=0.7
4
4
  pydantic<3,>=2
5
5
  PyYAML~=6.0
6
+ semver==3.*
@@ -16,8 +16,9 @@
16
16
  """Unit tests for core functions"""
17
17
 
18
18
  import pytest
19
+ import semver
19
20
 
20
- from ghga_transpiler.core import GHGAWorkbook
21
+ from ghga_transpiler.core import GHGAWorkbook, InvalidSematicVersion
21
22
 
22
23
  from .fixtures.utils import create_workbook
23
24
 
@@ -30,7 +31,7 @@ def test_extract_good_version() -> None:
30
31
  value = workbook["__properties"].cell(row=1, column=1, value="10.3.1-rc2").value
31
32
  # pylint: disable=protected-access
32
33
  version = GHGAWorkbook._get_version(workbook)
33
- assert version == value
34
+ assert version == semver.Version.parse(str(value))
34
35
 
35
36
 
36
37
  def test_extract_bad_version() -> None:
@@ -39,6 +40,7 @@ def test_extract_bad_version() -> None:
39
40
  """
40
41
  workbook = create_workbook("__properties")
41
42
  workbook["__properties"].cell(row=1, column=1, value="20.10.3.1")
42
- with pytest.raises(SyntaxError):
43
+
44
+ with pytest.raises(InvalidSematicVersion):
43
45
  # pylint: disable=protected-access
44
46
  GHGAWorkbook._get_version(workbook)
File without changes