ghga-transpiler 2.3.1__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ghga_transpiler/cli.py +64 -36
- ghga_transpiler/config.py +136 -0
- ghga_transpiler/{config/exceptions.py → exceptions.py} +14 -2
- ghga_transpiler/metasheet_parser.py +125 -0
- ghga_transpiler/models.py +98 -0
- ghga_transpiler/transformations.py +1 -1
- ghga_transpiler/transpile.py +53 -0
- ghga_transpiler/{io.py → transpiler_io.py} +20 -21
- ghga_transpiler/workbook_parser.py +177 -0
- {ghga_transpiler-2.3.1.dist-info → ghga_transpiler-3.0.0rc1.dist-info}/METADATA +5 -6
- ghga_transpiler-3.0.0rc1.dist-info/RECORD +17 -0
- {ghga_transpiler-2.3.1.dist-info → ghga_transpiler-3.0.0rc1.dist-info}/WHEEL +1 -1
- ghga_transpiler/config/__init__.py +0 -20
- ghga_transpiler/config/config.py +0 -106
- ghga_transpiler/configs/0.10.yaml +0 -135
- ghga_transpiler/configs/1.0.yaml +0 -135
- ghga_transpiler/configs/1.1.yaml +0 -135
- ghga_transpiler/configs/2.0.yaml +0 -170
- ghga_transpiler/configs/2.1.yaml +0 -172
- ghga_transpiler/configs/__init__.py +0 -16
- ghga_transpiler/core.py +0 -155
- ghga_transpiler-2.3.1.dist-info/RECORD +0 -21
- {ghga_transpiler-2.3.1.dist-info → ghga_transpiler-3.0.0rc1.dist-info}/entry_points.txt +0 -0
- {ghga_transpiler-2.3.1.dist-info → ghga_transpiler-3.0.0rc1.dist-info}/licenses/LICENSE +0 -0
- {ghga_transpiler-2.3.1.dist-info → ghga_transpiler-3.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# Copyright 2021 - 2025 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
|
|
2
|
+
# for the German Human Genome-Phenome Archive (GHGA)
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
"Module containing logic to parse a GHGA workbook"
|
|
17
|
+
|
|
18
|
+
from openpyxl import Workbook
|
|
19
|
+
from openpyxl.worksheet.worksheet import Worksheet
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
21
|
+
|
|
22
|
+
from .config import WorkbookConfig, WorksheetSettings
|
|
23
|
+
from .models import GHGAWorkbook, GHGAWorksheet
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WorksheetParser(BaseModel):
|
|
27
|
+
"""Group worksheet parser functions."""
|
|
28
|
+
|
|
29
|
+
config: WorksheetSettings
|
|
30
|
+
|
|
31
|
+
def _header(self, worksheet: Worksheet):
|
|
32
|
+
"""Return a list of column names of a worksheet."""
|
|
33
|
+
return [
|
|
34
|
+
cell.value
|
|
35
|
+
for row in worksheet.iter_rows(
|
|
36
|
+
self.config.settings.header_row,
|
|
37
|
+
self.config.settings.header_row,
|
|
38
|
+
self.config.settings.start_column,
|
|
39
|
+
self.config.settings.end_column,
|
|
40
|
+
)
|
|
41
|
+
for cell in row
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
def _rows(self, worksheet: Worksheet) -> list:
|
|
45
|
+
"""Create a list of non-empty rows of a worksheet."""
|
|
46
|
+
return [
|
|
47
|
+
row
|
|
48
|
+
for row in worksheet.iter_rows(
|
|
49
|
+
self.config.settings.start_row,
|
|
50
|
+
worksheet.max_row,
|
|
51
|
+
self.config.settings.start_column,
|
|
52
|
+
self.config.settings.end_column,
|
|
53
|
+
values_only=True,
|
|
54
|
+
)
|
|
55
|
+
if not all(cell is None for cell in row)
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
def _content(self, worksheet: Worksheet) -> list[dict]:
|
|
59
|
+
"""Compute and return the content of the worksheet, rows as worksheet row values and
|
|
60
|
+
column names as keys
|
|
61
|
+
"""
|
|
62
|
+
return [
|
|
63
|
+
{
|
|
64
|
+
key: value
|
|
65
|
+
for key, value in zip(self._header(worksheet), row, strict=True)
|
|
66
|
+
if value is not None and value != ""
|
|
67
|
+
}
|
|
68
|
+
for row in self._rows(worksheet)
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
def _transformed_content(self, worksheet: Worksheet) -> list:
|
|
72
|
+
"""Processes each row of the provided worksheet, applying transformations to
|
|
73
|
+
specific fields as defined in the configuration and returns the worksheet content.
|
|
74
|
+
"""
|
|
75
|
+
transformed_data = []
|
|
76
|
+
for row in self._content(worksheet):
|
|
77
|
+
transformed_row = {}
|
|
78
|
+
for key, value in row.items():
|
|
79
|
+
transformations = self.config.get_transformations()
|
|
80
|
+
if transformations and key in transformations:
|
|
81
|
+
transformed_row[key] = transformations[key](value)
|
|
82
|
+
else:
|
|
83
|
+
transformed_row[key] = value
|
|
84
|
+
transformed_data.append(transformed_row)
|
|
85
|
+
return transformed_data
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class GHGAWorksheetParser(WorksheetParser):
|
|
89
|
+
"""Extend WorksheetParser with GHGA worksheet specific parsers."""
|
|
90
|
+
|
|
91
|
+
def parse(self, worksheet: Worksheet):
|
|
92
|
+
"""Render a worksheet into GHGAWorksheet model"""
|
|
93
|
+
return GHGAWorksheet.model_validate(
|
|
94
|
+
{"worksheet": {self.config.settings.name: self._parse(worksheet)}}
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def _parse(self, worksheet: Worksheet) -> dict[str, dict]:
|
|
98
|
+
"""Parse a worksheet row by row into a dictionary of row-primary-keys as keys and
|
|
99
|
+
a dictionary of content and relations as the values.
|
|
100
|
+
"""
|
|
101
|
+
worksheet_data = self._transformed_content(worksheet)
|
|
102
|
+
return {
|
|
103
|
+
row[self.config.settings.primary_key]: {
|
|
104
|
+
"content": self._relation_free_content(row),
|
|
105
|
+
"relations": self._relations(row),
|
|
106
|
+
}
|
|
107
|
+
for row in worksheet_data
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
def _relations(self, row: dict) -> dict:
|
|
111
|
+
"""Get relations to a dictionary that contains relation name as key and the
|
|
112
|
+
resource that is in the relation as the value
|
|
113
|
+
"""
|
|
114
|
+
relations = self.config.get_relations()
|
|
115
|
+
return {
|
|
116
|
+
relation.name: {
|
|
117
|
+
"targetClass": relation.target_class,
|
|
118
|
+
"targetResources": row[relation.name],
|
|
119
|
+
}
|
|
120
|
+
for relation in relations
|
|
121
|
+
if relation.name in row
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
def _relation_free_content(self, row: dict) -> dict:
|
|
125
|
+
"""Clean up the content data from the relation, i.e., remove the key value pairs
|
|
126
|
+
belonging to a relation from the row content.
|
|
127
|
+
"""
|
|
128
|
+
relations = self._relations(row)
|
|
129
|
+
relations[self.config.settings.primary_key] = None
|
|
130
|
+
return {key: value for key, value in row.items() if key not in relations}
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class GHGAWorkbookParser(BaseModel):
|
|
134
|
+
"""Parser class for converting a workbook into a GHGAWorkbook."""
|
|
135
|
+
|
|
136
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
137
|
+
config: WorkbookConfig = Field(
|
|
138
|
+
...,
|
|
139
|
+
description="Configuration for processing the workbook, including worksheet"
|
|
140
|
+
+ " settings and column transformations.",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
workbook: Workbook = Field(
|
|
144
|
+
...,
|
|
145
|
+
description="Path to the Excel workbook file (.xlsx) that will be parsed."
|
|
146
|
+
+ " This file contains the data to be transformed and processed.",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
exclude: list = Field(
|
|
150
|
+
default=[
|
|
151
|
+
"__transpiler_protocol",
|
|
152
|
+
"__sheet_meta",
|
|
153
|
+
"__column_meta",
|
|
154
|
+
],
|
|
155
|
+
description="List of sheet names to exclude from processing."
|
|
156
|
+
+ " These are typically meta sheets that contain configuration data or metadata"
|
|
157
|
+
+ " rather than actual worksheet data. Default value corresponds to the GHGA"
|
|
158
|
+
+ " standard configuration sheets on the GHGA submission workbook.",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def parse(self) -> GHGAWorkbook:
|
|
162
|
+
"""Converts the given workbook into a GHGAWorkbook instance.
|
|
163
|
+
|
|
164
|
+
This method iterates through the sheets of the provided workbook, excluding
|
|
165
|
+
any meta sheets (i.e., '__transpiler_protocol', '__sheet_meta', '__column_meta').
|
|
166
|
+
"""
|
|
167
|
+
return GHGAWorkbook.model_validate(
|
|
168
|
+
{
|
|
169
|
+
"workbook": tuple(
|
|
170
|
+
GHGAWorksheetParser(config=self.config.worksheets[name]).parse(
|
|
171
|
+
self.workbook[name]
|
|
172
|
+
)
|
|
173
|
+
for name in self.workbook.sheetnames
|
|
174
|
+
if name not in self.exclude
|
|
175
|
+
)
|
|
176
|
+
}
|
|
177
|
+
)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ghga_transpiler
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0rc1
|
|
4
4
|
Summary: GHGA-Transpiler - excel to JSON converter
|
|
5
5
|
Author-email: "German Human Genome Phenome Archive (GHGA)" <contact@ghga.de>
|
|
6
6
|
License: Apache 2.0
|
|
7
7
|
Project-URL: Repository, https://github.com/ghga-de/ghga-transpiler
|
|
8
|
-
Classifier: Development Status ::
|
|
8
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
9
9
|
Classifier: Operating System :: POSIX :: Linux
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -13,16 +13,15 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
13
13
|
Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
|
|
14
14
|
Classifier: Topic :: Software Development :: Libraries
|
|
15
15
|
Classifier: Intended Audience :: Developers
|
|
16
|
-
Requires-Python: >=3.
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: typer>=0.12
|
|
20
19
|
Requires-Dist: openpyxl==3.*,>=3.1.2
|
|
21
20
|
Requires-Dist: defusedxml==0.*,>=0.7
|
|
22
|
-
Requires-Dist: pydantic<3,>=2
|
|
21
|
+
Requires-Dist: pydantic<3,>=2
|
|
23
22
|
Requires-Dist: PyYAML~=6.0
|
|
24
23
|
Requires-Dist: semver==3.*
|
|
25
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: schemapack==2.0.0
|
|
26
25
|
Dynamic: license-file
|
|
27
26
|
|
|
28
27
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
ghga_transpiler/__init__.py,sha256=b_JlBjNvngOiIolsfKKbwonyfOtS7b5_LJKLuB5LyNQ,908
|
|
2
|
+
ghga_transpiler/__main__.py,sha256=k82ZsGuAOTrbqB4rCNkGOS-AWU8GICtD1_owfiey-Sk,847
|
|
3
|
+
ghga_transpiler/cli.py,sha256=D7MdNSIaKdYWLRMxdvzgsyN_t-brrzLqKrX84qHytjc,2988
|
|
4
|
+
ghga_transpiler/config.py,sha256=R1J8oN3fkv3vlhqj2HuvviExJbL_FooMvDVkS0XUJlA,4411
|
|
5
|
+
ghga_transpiler/exceptions.py,sha256=yHK5m9nb7uPyEmQKv_FBYjpuveuNnc3rzWEXKxxLlYw,1444
|
|
6
|
+
ghga_transpiler/metasheet_parser.py,sha256=Zv9fOBNf7FuCG5RtOnOZNnS2hdrnDVUXaDSROSBC6c4,4883
|
|
7
|
+
ghga_transpiler/models.py,sha256=0HQOwWmYiFmj6gRlf1wXivmwvdjaTD2O1YlwKfi8rC8,3721
|
|
8
|
+
ghga_transpiler/transformations.py,sha256=j5buL8V9aOwJc5VlOS7fOrzgCG7FOvn1JxQvkPr9hLE,2235
|
|
9
|
+
ghga_transpiler/transpile.py,sha256=BVSIh_W6qo92iczea9TGniBhxIyUBbYrqiduyfY44IY,1975
|
|
10
|
+
ghga_transpiler/transpiler_io.py,sha256=ZDnNN5LhCf6ltDr14r_dqeZsbExum4-NAr6g1_liYqo,1809
|
|
11
|
+
ghga_transpiler/workbook_parser.py,sha256=PUySMzLBJxOQTFV5L1XngSLQr5zhKCl2GMbCMv8kIXM,6740
|
|
12
|
+
ghga_transpiler-3.0.0rc1.dist-info/licenses/LICENSE,sha256=nKgIWA3zoWy18_YSW6e9Gd6_p7d7ccwjNZrfJzvKALE,11452
|
|
13
|
+
ghga_transpiler-3.0.0rc1.dist-info/METADATA,sha256=WGeGf-KelIslFxO0U761SC2eYe_bY-WxdNSHl9ggSUY,4441
|
|
14
|
+
ghga_transpiler-3.0.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
ghga_transpiler-3.0.0rc1.dist-info/entry_points.txt,sha256=Fr_VQJynZZl95NXjrQW2gE0bCocgjKVNEZQpaLls8po,65
|
|
16
|
+
ghga_transpiler-3.0.0rc1.dist-info/top_level.txt,sha256=TksRpDO3Y4mvK-B09UhQU3ceZpG19fljNlVDGTfjg8o,16
|
|
17
|
+
ghga_transpiler-3.0.0rc1.dist-info/RECORD,,
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
# Copyright 2021 - 2025 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
|
|
2
|
-
# for the German Human Genome-Phenome Archive (GHGA)
|
|
3
|
-
|
|
4
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
# you may not use this file except in compliance with the License.
|
|
6
|
-
# You may obtain a copy of the License at
|
|
7
|
-
|
|
8
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
|
|
10
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
# See the License for the specific language governing permissions and
|
|
14
|
-
# limitations under the License.
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
|
|
18
|
-
"""Module to load workbook configurations and convert it to transpiler config"""
|
|
19
|
-
|
|
20
|
-
from .config import Config, load_config # noqa
|
ghga_transpiler/config/config.py
DELETED
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
# Copyright 2021 - 2025 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
|
|
2
|
-
# for the German Human Genome-Phenome Archive (GHGA)
|
|
3
|
-
|
|
4
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
# you may not use this file except in compliance with the License.
|
|
6
|
-
# You may obtain a copy of the License at
|
|
7
|
-
|
|
8
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
|
|
10
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
# See the License for the specific language governing permissions and
|
|
14
|
-
# limitations under the License.
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
|
|
18
|
-
"""Module to process config file"""
|
|
19
|
-
|
|
20
|
-
from __future__ import annotations
|
|
21
|
-
|
|
22
|
-
from collections import Counter
|
|
23
|
-
from collections.abc import Callable
|
|
24
|
-
from importlib import resources
|
|
25
|
-
|
|
26
|
-
import yaml
|
|
27
|
-
from pydantic import BaseModel, model_validator
|
|
28
|
-
|
|
29
|
-
from .exceptions import DuplicatedName, UnknownVersionError
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class DefaultSettings(BaseModel):
|
|
33
|
-
"""A data model for the defaults of the per-worksheet settings of a transpiler config"""
|
|
34
|
-
|
|
35
|
-
header_row: int = 0
|
|
36
|
-
start_row: int = 0
|
|
37
|
-
start_column: int = 0
|
|
38
|
-
end_column: int = 0
|
|
39
|
-
transformations: dict[str, Callable] = {}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class WorksheetSettings(BaseModel):
|
|
43
|
-
"""A data model for the per-worksheet settings of a transpiler config"""
|
|
44
|
-
|
|
45
|
-
name: str
|
|
46
|
-
header_row: int | None = None
|
|
47
|
-
start_row: int | None = None
|
|
48
|
-
start_column: int | None = None
|
|
49
|
-
end_column: int | None = None
|
|
50
|
-
transformations: dict[str, Callable] | None = None
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
class Worksheet(BaseModel):
|
|
54
|
-
"""A data model for worksheets in the transpiler config"""
|
|
55
|
-
|
|
56
|
-
sheet_name: str
|
|
57
|
-
settings: WorksheetSettings | None
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class Config(BaseModel):
|
|
61
|
-
"""A data model for the transpiler config"""
|
|
62
|
-
|
|
63
|
-
ghga_metadata_version: str
|
|
64
|
-
default_settings: DefaultSettings
|
|
65
|
-
worksheets: list[Worksheet]
|
|
66
|
-
|
|
67
|
-
@model_validator(mode="after")
|
|
68
|
-
def get_param(cls, values): # noqa
|
|
69
|
-
"""Function to manage parameters of global and worksheet specific configuration"""
|
|
70
|
-
for sheet in values.worksheets:
|
|
71
|
-
for key in values.default_settings.__dict__:
|
|
72
|
-
if getattr(sheet.settings, key) is None:
|
|
73
|
-
val = getattr(values.default_settings, key)
|
|
74
|
-
setattr(sheet.settings, key, val)
|
|
75
|
-
return values
|
|
76
|
-
|
|
77
|
-
@model_validator(mode="after")
|
|
78
|
-
def check_name(cls, values): # noqa
|
|
79
|
-
"""Function to ensure that each worksheets has a unique sheet_name and name attributes."""
|
|
80
|
-
# Check for duplicate attribute names
|
|
81
|
-
attrs_counter = Counter(ws.settings.name for ws in values.worksheets)
|
|
82
|
-
dup_attrs = [name for name, count in attrs_counter.items() if count > 1]
|
|
83
|
-
if dup_attrs:
|
|
84
|
-
raise DuplicatedName(
|
|
85
|
-
"Duplicate target attribute names: " + ", ".join(dup_attrs)
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
# Check for duplicate worksheet names
|
|
89
|
-
attrs_counter = Counter(ws.sheet_name for ws in values.worksheets)
|
|
90
|
-
dup_ws_names = [name for name, count in attrs_counter.items() if count > 1]
|
|
91
|
-
if dup_ws_names:
|
|
92
|
-
raise DuplicatedName(
|
|
93
|
-
"Duplicate worksheet names: " + ", ".join(dup_ws_names)
|
|
94
|
-
)
|
|
95
|
-
return values
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def load_config(version: str, package: resources.Package) -> Config:
|
|
99
|
-
"""Reads configuration yaml file from default location and creates a Config object"""
|
|
100
|
-
config_resource = resources.files(package).joinpath(f"{version}.yaml")
|
|
101
|
-
try:
|
|
102
|
-
config_str = config_resource.read_text(encoding="utf8")
|
|
103
|
-
except FileNotFoundError:
|
|
104
|
-
# pylint: disable=raise-missing-from
|
|
105
|
-
raise UnknownVersionError(f"Unknown metadata version: {version}") from None
|
|
106
|
-
return Config.model_validate(yaml.load(config_str, yaml.Loader)) # noqa # nosec
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
ghga_metadata_version: 0.10.0
|
|
2
|
-
default_settings:
|
|
3
|
-
header_row: 1
|
|
4
|
-
start_row: 7
|
|
5
|
-
start_column: 1
|
|
6
|
-
transformations:
|
|
7
|
-
attributes: !!python/object/apply:ghga_transpiler.transformations.to_attributes []
|
|
8
|
-
format: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
9
|
-
forward_or_reverse: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
10
|
-
worksheets:
|
|
11
|
-
- settings:
|
|
12
|
-
end_column: 6
|
|
13
|
-
name: analyses
|
|
14
|
-
sheet_name: Analysis
|
|
15
|
-
- settings:
|
|
16
|
-
end_column: 8
|
|
17
|
-
name: analysis_process_output_files
|
|
18
|
-
sheet_name: AnalysisProcessOutputFile
|
|
19
|
-
- settings:
|
|
20
|
-
end_column: 5
|
|
21
|
-
name: analysis_processes
|
|
22
|
-
transformations:
|
|
23
|
-
study_input_files: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
24
|
-
sample_input_files: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
25
|
-
sequencing_process_input_files: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
26
|
-
sheet_name: AnalysisProcess
|
|
27
|
-
- settings:
|
|
28
|
-
end_column: 9
|
|
29
|
-
name: biospecimens
|
|
30
|
-
transformations:
|
|
31
|
-
vital_status_at_sampling: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
32
|
-
age_at_sampling: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
33
|
-
sheet_name: Biospecimen
|
|
34
|
-
- settings:
|
|
35
|
-
end_column: 9
|
|
36
|
-
name: conditions
|
|
37
|
-
transformations:
|
|
38
|
-
disease_or_healthy: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
39
|
-
case_control_status: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
40
|
-
mutant_or_wildtype: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
41
|
-
sheet_name: Condition
|
|
42
|
-
- settings:
|
|
43
|
-
end_column: 3
|
|
44
|
-
name: data_access_committees
|
|
45
|
-
sheet_name: DataAccessCommittee
|
|
46
|
-
- settings:
|
|
47
|
-
end_column: 8
|
|
48
|
-
name: data_access_policies
|
|
49
|
-
transformations:
|
|
50
|
-
data_use_modifiers: !!python/object/apply:ghga_transpiler.transformations.to_snake_case_list []
|
|
51
|
-
data_use_permission: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
52
|
-
sheet_name: DataAccessPolicy
|
|
53
|
-
- settings:
|
|
54
|
-
end_column: 5
|
|
55
|
-
name: datasets
|
|
56
|
-
transformations:
|
|
57
|
-
types: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
58
|
-
sheet_name: Dataset
|
|
59
|
-
- settings:
|
|
60
|
-
end_column: 8
|
|
61
|
-
name: individuals
|
|
62
|
-
transformations:
|
|
63
|
-
ancestries: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
64
|
-
phenotypic_features: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
65
|
-
karyotype: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
66
|
-
sex: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
67
|
-
sheet_name: Individual
|
|
68
|
-
- settings:
|
|
69
|
-
end_column: 14
|
|
70
|
-
name: library_preparation_protocols
|
|
71
|
-
transformations:
|
|
72
|
-
target_regions: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
73
|
-
attributes: !!python/object/apply:ghga_transpiler.transformations.to_attributes []
|
|
74
|
-
library_layout: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
75
|
-
library_type: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
76
|
-
library_selection: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
77
|
-
library_preparation_kit_retail_name: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
78
|
-
rnaseq_strandedness: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
79
|
-
primer: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
80
|
-
end_bias: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
81
|
-
sheet_name: LibraryPreparationProtocol
|
|
82
|
-
- settings:
|
|
83
|
-
end_column: 9
|
|
84
|
-
name: publications
|
|
85
|
-
transformations:
|
|
86
|
-
xref: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
87
|
-
sheet_name: Publication
|
|
88
|
-
- settings:
|
|
89
|
-
end_column: 8
|
|
90
|
-
name: sample_files
|
|
91
|
-
sheet_name: SampleFile
|
|
92
|
-
- settings:
|
|
93
|
-
end_column: 10
|
|
94
|
-
name: samples
|
|
95
|
-
transformations:
|
|
96
|
-
xref: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
97
|
-
type: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
98
|
-
sheet_name: Sample
|
|
99
|
-
- settings:
|
|
100
|
-
end_column: 7
|
|
101
|
-
name: sequencing_experiments
|
|
102
|
-
sheet_name: SequencingExperiment
|
|
103
|
-
- settings:
|
|
104
|
-
end_column: 8
|
|
105
|
-
name: sequencing_process_files
|
|
106
|
-
sheet_name: SequencingProcessFile
|
|
107
|
-
- settings:
|
|
108
|
-
end_column: 10
|
|
109
|
-
name: sequencing_processes
|
|
110
|
-
sheet_name: SequencingProcess
|
|
111
|
-
- settings:
|
|
112
|
-
end_column: 20
|
|
113
|
-
name: sequencing_protocols
|
|
114
|
-
transformations:
|
|
115
|
-
instrument_model: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
116
|
-
flow_cell_type: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
117
|
-
umi_barcode_read: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
118
|
-
sample_barcode_read: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
119
|
-
sheet_name: SequencingProtocol
|
|
120
|
-
- settings:
|
|
121
|
-
end_column: 6
|
|
122
|
-
name: studies
|
|
123
|
-
transformations:
|
|
124
|
-
affiliations: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
125
|
-
attributes: !!python/object/apply:ghga_transpiler.transformations.to_attributes []
|
|
126
|
-
type: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
127
|
-
sheet_name: Study
|
|
128
|
-
- settings:
|
|
129
|
-
end_column: 8
|
|
130
|
-
name: study_files
|
|
131
|
-
sheet_name: StudyFile
|
|
132
|
-
- settings:
|
|
133
|
-
end_column: 4
|
|
134
|
-
name: trios
|
|
135
|
-
sheet_name: Trio
|
ghga_transpiler/configs/1.0.yaml
DELETED
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
ghga_metadata_version: 1.0.0
|
|
2
|
-
default_settings:
|
|
3
|
-
header_row: 1
|
|
4
|
-
start_row: 7
|
|
5
|
-
start_column: 1
|
|
6
|
-
transformations:
|
|
7
|
-
attributes: !!python/object/apply:ghga_transpiler.transformations.to_attributes []
|
|
8
|
-
format: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
9
|
-
forward_or_reverse: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
10
|
-
worksheets:
|
|
11
|
-
- settings:
|
|
12
|
-
end_column: 6
|
|
13
|
-
name: analyses
|
|
14
|
-
sheet_name: Analysis
|
|
15
|
-
- settings:
|
|
16
|
-
end_column: 9
|
|
17
|
-
name: analysis_process_output_files
|
|
18
|
-
sheet_name: AnalysisProcessOutputFile
|
|
19
|
-
- settings:
|
|
20
|
-
end_column: 5
|
|
21
|
-
name: analysis_processes
|
|
22
|
-
transformations:
|
|
23
|
-
study_input_files: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
24
|
-
sample_input_files: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
25
|
-
sequencing_process_input_files: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
26
|
-
sheet_name: AnalysisProcess
|
|
27
|
-
- settings:
|
|
28
|
-
end_column: 10
|
|
29
|
-
name: biospecimens
|
|
30
|
-
transformations:
|
|
31
|
-
vital_status_at_sampling: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
32
|
-
age_at_sampling: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
33
|
-
sheet_name: Biospecimen
|
|
34
|
-
- settings:
|
|
35
|
-
end_column: 9
|
|
36
|
-
name: conditions
|
|
37
|
-
transformations:
|
|
38
|
-
disease_or_healthy: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
39
|
-
case_control_status: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
40
|
-
mutant_or_wildtype: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
41
|
-
sheet_name: Condition
|
|
42
|
-
- settings:
|
|
43
|
-
end_column: 3
|
|
44
|
-
name: data_access_committees
|
|
45
|
-
sheet_name: DataAccessCommittee
|
|
46
|
-
- settings:
|
|
47
|
-
end_column: 8
|
|
48
|
-
name: data_access_policies
|
|
49
|
-
transformations:
|
|
50
|
-
data_use_modifiers: !!python/object/apply:ghga_transpiler.transformations.to_snake_case_list []
|
|
51
|
-
data_use_permission: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
52
|
-
sheet_name: DataAccessPolicy
|
|
53
|
-
- settings:
|
|
54
|
-
end_column: 5
|
|
55
|
-
name: datasets
|
|
56
|
-
transformations:
|
|
57
|
-
types: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
58
|
-
sheet_name: Dataset
|
|
59
|
-
- settings:
|
|
60
|
-
end_column: 6
|
|
61
|
-
name: individuals
|
|
62
|
-
transformations:
|
|
63
|
-
ancestries: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
64
|
-
phenotypic_features: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
65
|
-
karyotype: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
66
|
-
sex: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
67
|
-
sheet_name: Individual
|
|
68
|
-
- settings:
|
|
69
|
-
end_column: 14
|
|
70
|
-
name: library_preparation_protocols
|
|
71
|
-
transformations:
|
|
72
|
-
target_regions: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
73
|
-
attributes: !!python/object/apply:ghga_transpiler.transformations.to_attributes []
|
|
74
|
-
library_layout: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
75
|
-
library_type: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
76
|
-
library_selection: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
77
|
-
library_preparation_kit_retail_name: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
78
|
-
rnaseq_strandedness: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
79
|
-
primer: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
80
|
-
end_bias: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
81
|
-
sheet_name: LibraryPreparationProtocol
|
|
82
|
-
- settings:
|
|
83
|
-
end_column: 9
|
|
84
|
-
name: publications
|
|
85
|
-
transformations:
|
|
86
|
-
xref: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
87
|
-
sheet_name: Publication
|
|
88
|
-
- settings:
|
|
89
|
-
end_column: 9
|
|
90
|
-
name: sample_files
|
|
91
|
-
sheet_name: SampleFile
|
|
92
|
-
- settings:
|
|
93
|
-
end_column: 10
|
|
94
|
-
name: samples
|
|
95
|
-
transformations:
|
|
96
|
-
xref: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
97
|
-
type: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
98
|
-
sheet_name: Sample
|
|
99
|
-
- settings:
|
|
100
|
-
end_column: 7
|
|
101
|
-
name: sequencing_experiments
|
|
102
|
-
sheet_name: SequencingExperiment
|
|
103
|
-
- settings:
|
|
104
|
-
end_column: 9
|
|
105
|
-
name: sequencing_process_files
|
|
106
|
-
sheet_name: SequencingProcessFile
|
|
107
|
-
- settings:
|
|
108
|
-
end_column: 12
|
|
109
|
-
name: sequencing_processes
|
|
110
|
-
sheet_name: SequencingProcess
|
|
111
|
-
- settings:
|
|
112
|
-
end_column: 17
|
|
113
|
-
name: sequencing_protocols
|
|
114
|
-
transformations:
|
|
115
|
-
instrument_model: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
116
|
-
flow_cell_type: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
117
|
-
umi_barcode_read: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
118
|
-
sample_barcode_read: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
119
|
-
sheet_name: SequencingProtocol
|
|
120
|
-
- settings:
|
|
121
|
-
end_column: 6
|
|
122
|
-
name: studies
|
|
123
|
-
transformations:
|
|
124
|
-
affiliations: !!python/object/apply:ghga_transpiler.transformations.to_list []
|
|
125
|
-
attributes: !!python/object/apply:ghga_transpiler.transformations.to_attributes []
|
|
126
|
-
type: !!python/object/apply:ghga_transpiler.transformations.to_snake_case []
|
|
127
|
-
sheet_name: Study
|
|
128
|
-
- settings:
|
|
129
|
-
end_column: 9
|
|
130
|
-
name: study_files
|
|
131
|
-
sheet_name: StudyFile
|
|
132
|
-
- settings:
|
|
133
|
-
end_column: 4
|
|
134
|
-
name: trios
|
|
135
|
-
sheet_name: Trio
|