ghga-transpiler 1.2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,562 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- # Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
4
- # for the German Human Genome-Phenome Archive (GHGA)
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
-
18
- # pylint: skip-file
19
-
20
- """This script checks that the license and license headers
21
- exists and that they are up to date.
22
- """
23
-
24
- import argparse
25
- import re
26
- import sys
27
- from datetime import date
28
- from pathlib import Path
29
- from typing import List, Optional, Tuple, Union
30
-
31
- # root directory of the package:
32
- ROOT_DIR = Path(__file__).parent.parent.resolve()
33
-
34
- # file containing the default global copyright notice:
35
- GLOBAL_COPYRIGHT_FILE_PATH = ROOT_DIR / ".devcontainer" / "license_header.txt"
36
-
37
- # exclude files and dirs from license header check:
38
- EXCLUDE = [
39
- ".devcontainer",
40
- "eggs",
41
- ".eggs",
42
- "dist",
43
- "build",
44
- "develop-eggs",
45
- "lib",
46
- "lib62",
47
- "parts",
48
- "sdist",
49
- "wheels",
50
- "pip-wheel-metadata",
51
- ".git",
52
- ".github",
53
- ".flake8",
54
- ".gitignore",
55
- ".pylintrc",
56
- "example_config.yaml",
57
- "config_schema.json",
58
- "LICENSE", # is checked but not for the license header
59
- ".pre-commit-config.yaml",
60
- "docs",
61
- ".vscode",
62
- ".mypy_cache",
63
- ".mypy.ini",
64
- ".pytest_cache",
65
- ".editorconfig",
66
- ".static_files",
67
- ".static_files_ignore",
68
- ".mandatory_files",
69
- ".mandatory_files_ignore",
70
- ".deprecated_files",
71
- ".deprecated_files_ignore",
72
- ]
73
-
74
- # exclude file by file ending from license header check:
75
- EXCLUDE_ENDINGS = [
76
- "html",
77
- "ini",
78
- "jinja",
79
- "json",
80
- "md",
81
- "pub",
82
- "pyc",
83
- "sec",
84
- "txt",
85
- "xml",
86
- "yaml",
87
- "yml",
88
- ]
89
-
90
- # exclude any files with names that match any of the following regex:
91
- EXCLUDE_PATTERN = [r".*\.egg-info.*", r".*__cache__.*", r".*\.git.*"]
92
-
93
- # The License header, "{year}" will be replaced by current year:
94
- COPYRIGHT_TEMPLATE = """Copyright {year} {author}
95
-
96
- Licensed under the Apache License, Version 2.0 (the "License");
97
- you may not use this file except in compliance with the License.
98
- You may obtain a copy of the License at
99
-
100
- http://www.apache.org/licenses/LICENSE-2.0
101
-
102
- Unless required by applicable law or agreed to in writing, software
103
- distributed under the License is distributed on an "AS IS" BASIS,
104
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
105
- See the License for the specific language governing permissions and
106
- limitations under the License."""
107
-
108
- # A list of all chars that may be used to introduce a comment:
109
- COMMENT_CHARS = ["#"]
110
-
111
- AUTHOR = """Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
112
- for the German Human Genome-Phenome Archive (GHGA)"""
113
-
114
- # The copyright notice should not date earlier than this year:
115
- MIN_YEAR = 2021
116
-
117
- # The path to the License file relative to target dir
118
- LICENSE_FILE = "LICENSE"
119
-
120
-
121
- class GlobalCopyrightNotice:
122
- """
123
- This is used to store the copyright notice that should be identical for all checked
124
- files.
125
- The text of the copyright notice is stored in the `text`
126
- property. This property can only be set once.
127
- The property `n_lines` gives the number of lines of the text. It is inferred once
128
- `text` is set.
129
- """
130
-
131
- def __init__(self):
132
- self._text: Optional[str] = None
133
- self._n_lines: Optional[int] = None
134
-
135
- @property
136
- def text(self) -> Optional[str]:
137
- return self._text
138
-
139
- @text.setter
140
- def text(self, new_text: str):
141
- if self._text is not None:
142
- raise RuntimeError("You can only set the value once.")
143
- self._text = new_text
144
- self._n_lines = len(self._text.split("\n"))
145
-
146
- @property
147
- def n_lines(self) -> int:
148
- if self._n_lines is None:
149
- raise ValueError(
150
- "This property is not yet available."
151
- + " Please set the `text` property first."
152
- )
153
- return self._n_lines
154
-
155
-
156
- class UnexpectedBinaryFileError(RuntimeError):
157
- """Thrown when trying to read a binary file."""
158
-
159
- def __init__(self, file_path: Union[str, Path]):
160
- message = f"The file could not be read because it is binary: {str(file_path)}"
161
- super().__init__(message)
162
-
163
-
164
- def get_target_files(
165
- target_dir: Path,
166
- exclude: List[str] = EXCLUDE,
167
- exclude_endings: List[str] = EXCLUDE_ENDINGS,
168
- exclude_pattern: List[str] = EXCLUDE_PATTERN,
169
- ) -> List[Path]:
170
- """Get target files that are not match the exclude conditions.
171
- Args:
172
- target_dir (pathlib.Path): The target dir to search.
173
- exclude (List[str], optional):
174
- Overwrite default list of file/dir paths relative to
175
- the target dir that shall be excluded.
176
- exclude_endings (List[str], optional):
177
- Overwrite default list of file endings that shall
178
- be excluded.
179
- exclude_pattern (List[str], optional):
180
- Overwrite default list of regex patterns match file path
181
- for exclusion.
182
- """
183
- abs_target_dir = Path(target_dir).absolute()
184
- exclude_normalized = [(abs_target_dir / excl).absolute() for excl in exclude]
185
-
186
- # get all files:
187
- all_files = [
188
- file_.absolute() for file_ in Path(abs_target_dir).rglob("*") if file_.is_file()
189
- ]
190
-
191
- target_files = [
192
- file_
193
- for file_ in all_files
194
- if not (
195
- any([file_.is_relative_to(excl) for excl in exclude_normalized])
196
- or any([str(file_).endswith(ending) for ending in exclude_endings])
197
- or any([re.match(pattern, str(file_)) for pattern in exclude_pattern])
198
- )
199
- ]
200
- return target_files
201
-
202
-
203
- def normalized_line(line: str, chars_to_trim: List[str] = COMMENT_CHARS) -> str:
204
- norm_line = line.strip()
205
-
206
- for char in chars_to_trim:
207
- norm_line = norm_line.strip(char)
208
-
209
- return norm_line.strip("\n").strip("\t").strip()
210
-
211
-
212
- def normalized_text(text: str, chars_to_trim: List[str] = COMMENT_CHARS) -> str:
213
- "Normalize a license header text."
214
- lines = text.split("\n")
215
-
216
- norm_lines: List[str] = []
217
-
218
- for line in lines:
219
- stripped_line = line.strip()
220
- # exclude shebang:
221
- if stripped_line.startswith("#!"):
222
- continue
223
-
224
- norm_line = normalized_line(stripped_line)
225
-
226
- # exclude empty lines:
227
- if norm_line == "":
228
- continue
229
-
230
- norm_lines.append(norm_line)
231
-
232
- return "\n".join(norm_lines).strip("\n")
233
-
234
-
235
- def format_copyright_template(copyright_template: str, author: str) -> str:
236
- """Formats license header by inserting the specified author for every occurence of
237
- "{author}" in the header template.
238
- """
239
- return normalized_text(copyright_template.replace("{author}", author))
240
-
241
-
242
- def is_commented_line(line: str, comment_chars: List[str] = COMMENT_CHARS) -> bool:
243
- """Checks whether a line is a comment."""
244
- line_stripped = line.strip()
245
- for comment_char in comment_chars:
246
- if line_stripped.startswith(comment_char):
247
- return True
248
-
249
- return False
250
-
251
-
252
- def is_empty_line(line: str) -> bool:
253
- """Checks whether a line is empty."""
254
- return line.strip("\n").strip("\t").strip() == ""
255
-
256
-
257
- def get_header(file_path: Path, comment_chars: List[str] = COMMENT_CHARS):
258
- """Extracts the header from a file and normalizes it."""
259
- header_lines: List[str] = []
260
-
261
- try:
262
- with open(file_path, "r") as file:
263
- for line in file:
264
- if is_commented_line(
265
- line, comment_chars=comment_chars
266
- ) or is_empty_line(line):
267
- header_lines.append(line)
268
- else:
269
- break
270
- except UnicodeDecodeError as error:
271
- raise UnexpectedBinaryFileError(file_path=file_path) from error
272
-
273
- # normalize the lines:
274
- header = "".join(header_lines)
275
- return normalized_text(header, chars_to_trim=comment_chars)
276
-
277
-
278
- def validate_year_string(year_string: str, min_year: int = MIN_YEAR) -> bool:
279
- """Check if the specified year string is valid.
280
- Returns `True` if valid or `False` otherwise."""
281
-
282
- current_year = date.today().year
283
-
284
- # If the year_string is a single number, it must be the current year:
285
- if year_string.isnumeric():
286
- return int(year_string) == current_year
287
-
288
- # Otherwise, a range (e.g. 2021 - 2023) is expected:
289
- match = re.match("(\d+) - (\d+)", year_string)
290
-
291
- if not match:
292
- return False
293
-
294
- year_1 = int(match.group(1))
295
- year_2 = int(match.group(2))
296
-
297
- # Check the validity of the range:
298
- if year_1 >= min_year and year_2 <= year_1:
299
- return False
300
-
301
- # year_2 must be equal to the current year:
302
- return year_2 == current_year
303
-
304
-
305
- def check_copyright_notice(
306
- copyright: str,
307
- global_copyright: GlobalCopyrightNotice,
308
- copyright_template: str = COPYRIGHT_TEMPLATE,
309
- author: str = AUTHOR,
310
- comment_chars: List[str] = COMMENT_CHARS,
311
- min_year: int = MIN_YEAR,
312
- ) -> bool:
313
- """Checks the specified copyright text against a template.
314
-
315
- copyright_template (str):
316
- A string containing the copyright text to check against the template.
317
- global_copyright (str, None):
318
- If this is a string, it is checked whether the copyright notice in this file
319
- contains the same year string.
320
- If this is None, the variable is set to the year string present in the
321
- copyright notice of this file.
322
- copyright_template (str, optional):
323
- A string containing a template for the expected license header.
324
- You may include "{year}" which will be replace by the current year.
325
- This defaults to the Apache 2.0 Copyright notice.
326
- author (str, optional):
327
- The author that shall be included in the license header.
328
- It will replace any appearance of "{author}" in the license
329
- header. This defaults to an auther info for GHGA.
330
-
331
- """
332
- # If the global_copyright is already set, check if the current copyright is
333
- # identical to it:
334
- copyright_lines = copyright.split("\n")
335
- if global_copyright.text is not None:
336
- copyright_cleaned = "\n".join(copyright_lines[0 : global_copyright.n_lines])
337
- return global_copyright.text == copyright_cleaned
338
-
339
- formatted_template = format_copyright_template(copyright_template, author=author)
340
- template_lines = formatted_template.split("\n")
341
-
342
- # The header should be at least as long as the template:
343
- if len(copyright_lines) < len(template_lines):
344
- return False
345
-
346
- for idx, template_line in enumerate(template_lines):
347
- header_line = copyright_lines[idx]
348
-
349
- if "{year}" in template_line:
350
- pattern = template_line.replace("{year}", r"(.+?)")
351
- match = re.match(pattern, header_line)
352
-
353
- if not match:
354
- return False
355
-
356
- year_string = match.group(1)
357
- if not validate_year_string(year_string, min_year=min_year):
358
- return False
359
-
360
- elif template_line != header_line:
361
- return False
362
-
363
- # Take this copyright as the global_copyright from now on:
364
- copyright_cleaned = "\n".join(copyright_lines[0 : len(template_line)])
365
- global_copyright.text = copyright_cleaned
366
-
367
- return True
368
-
369
-
370
- def check_file_headers(
371
- target_dir: Path,
372
- global_copyright: GlobalCopyrightNotice,
373
- copyright_template: str = COPYRIGHT_TEMPLATE,
374
- author: str = AUTHOR,
375
- exclude: List[str] = EXCLUDE,
376
- exclude_endings: List[str] = EXCLUDE_ENDINGS,
377
- exclude_pattern: List[str] = EXCLUDE_PATTERN,
378
- comment_chars: List[str] = COMMENT_CHARS,
379
- min_year: int = MIN_YEAR,
380
- ) -> Tuple[List[Path], List[Path]]:
381
- """Check files for presence of a license header and verify that
382
- the copyright notice is up to date (correct year).
383
-
384
- Args:
385
- target_dir (pathlib.Path): The target dir to search.
386
- copyright_template (str, optional):
387
- A string containing a template for the expected license header.
388
- You may include "{year}" which will be replace by the current year.
389
- This defaults to the Apache 2.0 Copyright notice.
390
- global_copyright (str, None):
391
- If this is a string, it is checked whether the copyright notice of these
392
- files contains the same year string.
393
- If this is None, the variable is set to the year string present in the
394
- copyright notice of these files.
395
- author (str, optional):
396
- The author that shall be included in the license header.
397
- It will replace any appearance of "{author}" in the license
398
- header. This defaults to an author info for GHGA.
399
- exclude (List[str], optional):
400
- Overwrite default list of file/dir paths relative to
401
- the target dir that shall be excluded.
402
- exclude_endings (List[str], optional):
403
- Overwrite default list of file endings that shall
404
- be excluded.
405
- exclude_pattern (List[str], optional):
406
- Overwrite default list of regex patterns match file path
407
- for exclusion.
408
- """
409
- target_files = get_target_files(
410
- target_dir,
411
- exclude=exclude,
412
- exclude_endings=exclude_endings,
413
- exclude_pattern=exclude_pattern,
414
- )
415
-
416
- # check if license header present in file:
417
- passed_files: List[Path] = []
418
- failed_files: List[Path] = []
419
-
420
- for target_file in target_files:
421
- try:
422
- header = get_header(target_file, comment_chars=comment_chars)
423
- if check_copyright_notice(
424
- copyright=header,
425
- global_copyright=global_copyright,
426
- copyright_template=copyright_template,
427
- author=author,
428
- comment_chars=comment_chars,
429
- min_year=min_year,
430
- ):
431
- passed_files.append(target_file)
432
- else:
433
- failed_files.append(target_file)
434
- except UnexpectedBinaryFileError:
435
- # This file is a binary and is therefor skipped.
436
- pass
437
-
438
- return (passed_files, failed_files)
439
-
440
-
441
- def check_license_file(
442
- license_file: Path,
443
- global_copyright: GlobalCopyrightNotice,
444
- copyright_template: str = COPYRIGHT_TEMPLATE,
445
- author: str = AUTHOR,
446
- comment_chars: List[str] = COMMENT_CHARS,
447
- min_year: int = MIN_YEAR,
448
- ) -> bool:
449
- """Currently only checks if the copyright notice in the
450
- License file is up to data.
451
-
452
- Args:
453
- license_file (pathlib.Path, optional): Overwrite the default license file.
454
- global_copyright (str, None):
455
- If this is a string, it is checked whether the copyright notice in this file
456
- contains the same year string.
457
- If this is None, the variable is set to the year string present in the
458
- copyright notice of this file.
459
- copyright_template (str, optional):
460
- A string of the copyright notice (usually same as license header).
461
- You may include "{year}" which will be replace by the current year.
462
- This defaults to the Apache 2.0 Copyright notice.
463
- author (str, optional):
464
- The author that shall be included in the copyright notice.
465
- It will replace any appearance of "{author}" in the copyright
466
- notice. This defaults to an author info for GHGA.
467
- """
468
-
469
- if not license_file.is_file():
470
- print(f'Could not find license file "{str(license_file)}".')
471
- return False
472
-
473
- with open(license_file, "r") as file_:
474
- license_text = normalized_text(file_.read())
475
-
476
- # Extract the copyright notice:
477
- # (is expected to be at the end of the file):
478
- formatted_template = format_copyright_template(copyright_template, author=author)
479
- template_lines = formatted_template.split("\n")
480
- license_lines = license_text.split("\n")
481
- copyright = "\n".join(license_lines[-len(template_lines) :])
482
-
483
- return check_copyright_notice(
484
- copyright=copyright,
485
- global_copyright=global_copyright,
486
- copyright_template=copyright_template,
487
- author=author,
488
- comment_chars=comment_chars,
489
- min_year=min_year,
490
- )
491
-
492
-
493
- def run():
494
- """Run checks from CLI."""
495
- parser = argparse.ArgumentParser(
496
- prog="license-checker",
497
- description=(
498
- "This script checks that the license and license headers "
499
- + "exists and that they are up to date."
500
- ),
501
- )
502
-
503
- parser.add_argument(
504
- "-L",
505
- "--no-license-file-check",
506
- help="Disables the check of the license file",
507
- action="store_true",
508
- )
509
-
510
- parser.add_argument(
511
- "-t",
512
- "--target-dir",
513
- help="Specify a custom target dir. Overwrites the default package root.",
514
- )
515
-
516
- args = parser.parse_args()
517
-
518
- target_dir = Path(args.target_dir).absolute() if args.target_dir else ROOT_DIR
519
-
520
- print(f'Working in "{target_dir}"\n')
521
-
522
- global_copyright = GlobalCopyrightNotice()
523
-
524
- # get global copyright from .devcontainer/license_header.txt file:
525
- with open(GLOBAL_COPYRIGHT_FILE_PATH, "r") as global_copyright_file:
526
- global_copyright.text = normalized_text(global_copyright_file.read())
527
-
528
- if args.no_license_file_check:
529
- license_file_valid = True
530
- else:
531
- license_file = Path(target_dir / LICENSE_FILE)
532
- print(f'Checking if LICENSE file is up to date: "{license_file}"')
533
- license_file_valid = check_license_file(
534
- license_file, global_copyright=global_copyright
535
- )
536
- print(
537
- "Copyright notice in license file is "
538
- + ("" if license_file_valid else "not ")
539
- + "up to date.\n"
540
- )
541
-
542
- print("Checking license headers in files:")
543
- passed_files, failed_files = check_file_headers(
544
- target_dir, global_copyright=global_copyright
545
- )
546
- print(f"{len(passed_files)} files passed.")
547
- print(f"{len(failed_files)} files failed" + (":" if failed_files else "."))
548
- for failed_file in failed_files:
549
- print(f' - "{failed_file.relative_to(target_dir)}"')
550
-
551
- print("")
552
-
553
- if failed_files or not license_file_valid:
554
- print("Some checks failed.")
555
- sys.exit(1)
556
-
557
- print("All checks passed.")
558
- sys.exit(0)
559
-
560
-
561
- if __name__ == "__main__":
562
- run()
@@ -1,17 +0,0 @@
1
- # Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
2
- # for the German Human Genome-Phenome Archive (GHGA)
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- #
16
-
17
- """A collection of utilities used by scripts."""
@@ -1,36 +0,0 @@
1
- # Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
2
- # for the German Human Genome-Phenome Archive (GHGA)
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- #
16
-
17
- """A collection of CLI utilities"""
18
-
19
- import typer
20
-
21
-
22
- def echo_success(message: str):
23
- """Print a success message."""
24
-
25
- styled_message = typer.style(text=message, fg=typer.colors.GREEN)
26
- typer.echo(styled_message)
27
-
28
-
29
- def echo_failure(message: str):
30
- """Print a failure message."""
31
-
32
- styled_message = typer.style(text=message, fg=typer.colors.RED)
33
- typer.echo(styled_message)
34
-
35
-
36
- run = typer.run
scripts/update_all.py DELETED
@@ -1,51 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- # Copyright 2021 - 2023 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
4
- # for the German Human Genome-Phenome Archive (GHGA)
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
-
19
- """Run all update scripts that are present in the repository in the correct order"""
20
-
21
- try:
22
- from scripts.update_template_files import main as update_template
23
- except ImportError:
24
- pass
25
- else:
26
- print("Pulling in updates from template repository")
27
- update_template()
28
-
29
- try:
30
- from scripts.update_config_docs import main as update_config
31
- except ImportError:
32
- pass
33
- else:
34
- print("Updating config docs")
35
- update_config()
36
-
37
- try:
38
- from scripts.update_openapi_docs import main as update_openapi
39
- except ImportError:
40
- pass
41
- else:
42
- print("Updating OpenAPI docs")
43
- update_openapi()
44
-
45
- try:
46
- from scripts.update_readme import main as update_readme
47
- except ImportError:
48
- pass
49
- else:
50
- print("Updating README")
51
- update_readme()