pybiolib 1.2.71__tar.gz → 1.2.99.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/PKG-INFO +1 -1
  2. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/utils/seq_util.py +61 -53
  3. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/pyproject.toml +1 -1
  4. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/LICENSE +0 -0
  5. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/PYPI_README.md +0 -0
  6. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/__init__.py +0 -0
  7. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_data_record/data_record.py +0 -0
  8. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/__init__.py +0 -0
  9. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/data_record/__init__.py +0 -0
  10. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/data_record/data_record.py +0 -0
  11. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/data_record/push_data.py +0 -0
  12. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/data_record/remote_storage_endpoint.py +0 -0
  13. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/file_utils.py +0 -0
  14. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/fuse_mount/__init__.py +0 -0
  15. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/fuse_mount/experiment_fuse_mount.py +0 -0
  16. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/http_client.py +0 -0
  17. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/lfs/__init__.py +0 -0
  18. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/lfs/cache.py +0 -0
  19. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/libs/__init__.py +0 -0
  20. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/libs/fusepy/__init__.py +0 -0
  21. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/push_application.py +0 -0
  22. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/runtime.py +0 -0
  23. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/types/__init__.py +0 -0
  24. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/types/app.py +0 -0
  25. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/types/data_record.py +0 -0
  26. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/types/experiment.py +0 -0
  27. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/types/resource.py +0 -0
  28. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/types/resource_version.py +0 -0
  29. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/types/typing.py +0 -0
  30. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/utils/__init__.py +0 -0
  31. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_internal/utils/multinode.py +0 -0
  32. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/_runtime/runtime.py +0 -0
  33. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/api/__init__.py +0 -0
  34. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/api/client.py +0 -0
  35. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/app/__init__.py +0 -0
  36. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/app/app.py +0 -0
  37. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/app/search_apps.py +0 -0
  38. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/__init__.py +0 -0
  39. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/api_client.py +0 -0
  40. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/app_types.py +0 -0
  41. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/auth.py +0 -0
  42. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/biolib_app_api.py +0 -0
  43. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/biolib_job_api.py +0 -0
  44. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/common_types.py +0 -0
  45. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/job_types.py +0 -0
  46. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/lfs_types.py +0 -0
  47. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_api_client/user_state.py +0 -0
  48. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/__init__.py +0 -0
  49. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/base_bbf_package.py +0 -0
  50. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/file_in_container.py +0 -0
  51. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/module_input.py +0 -0
  52. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/module_output_v2.py +0 -0
  53. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/remote_endpoints.py +0 -0
  54. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/remote_stream_seeker.py +0 -0
  55. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/saved_job.py +0 -0
  56. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/stdout_and_stderr.py +0 -0
  57. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/system_exception.py +0 -0
  58. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/system_status_update.py +0 -0
  59. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_binary_format/utils.py +0 -0
  60. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_docker_client/__init__.py +0 -0
  61. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_download_container.py +0 -0
  62. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_errors.py +0 -0
  63. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/biolib_logging.py +0 -0
  64. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/__init__.py +0 -0
  65. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/auth.py +0 -0
  66. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/data_record.py +0 -0
  67. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/download_container.py +0 -0
  68. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/init.py +0 -0
  69. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/lfs.py +0 -0
  70. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/push.py +0 -0
  71. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/run.py +0 -0
  72. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/runtime.py +0 -0
  73. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/cli/start.py +0 -0
  74. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/.gitignore +0 -0
  75. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/__init__.py +0 -0
  76. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/cloud_utils/__init__.py +0 -0
  77. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/cloud_utils/cloud_utils.py +0 -0
  78. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/__init__.py +0 -0
  79. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/cache_state.py +0 -0
  80. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/cache_types.py +0 -0
  81. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/docker_image_cache.py +0 -0
  82. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/executors/__init__.py +0 -0
  83. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/executors/docker_executor.py +0 -0
  84. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/executors/docker_types.py +0 -0
  85. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/executors/tars/__init__.py +0 -0
  86. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/executors/types.py +0 -0
  87. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +0 -0
  88. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +0 -0
  89. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/job_storage.py +0 -0
  90. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/job_worker.py +0 -0
  91. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/large_file_system.py +0 -0
  92. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/mappings.py +0 -0
  93. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/utilization_reporter_thread.py +0 -0
  94. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/job_worker/utils.py +0 -0
  95. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/remote_host_proxy.py +0 -0
  96. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/socker_listener_thread.py +0 -0
  97. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/socket_sender_thread.py +0 -0
  98. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/utils.py +0 -0
  99. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/webserver/__init__.py +0 -0
  100. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/webserver/gunicorn_flask_application.py +0 -0
  101. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/webserver/webserver.py +0 -0
  102. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/webserver/webserver_types.py +0 -0
  103. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/webserver/webserver_utils.py +0 -0
  104. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/compute_node/webserver/worker_thread.py +0 -0
  105. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/experiments/__init__.py +0 -0
  106. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/experiments/experiment.py +0 -0
  107. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/jobs/__init__.py +0 -0
  108. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/jobs/job.py +0 -0
  109. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/jobs/job_result.py +0 -0
  110. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/jobs/types.py +0 -0
  111. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/runtime/__init__.py +0 -0
  112. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/sdk/__init__.py +0 -0
  113. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/tables.py +0 -0
  114. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/templates/__init__.py +0 -0
  115. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/templates/example_app.py +0 -0
  116. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/typing_utils.py +0 -0
  117. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/user/__init__.py +0 -0
  118. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/user/sign_in.py +0 -0
  119. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/utils/__init__.py +0 -0
  120. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/utils/app_uri.py +0 -0
  121. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/utils/cache_state.py +0 -0
  122. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/utils/multipart_uploader.py +0 -0
  123. {pybiolib-1.2.71 → pybiolib-1.2.99.dev1}/biolib/utils/zip/remote_zip.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.2.71
3
+ Version: 1.2.99.dev1
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -1,7 +1,7 @@
1
1
  import re
2
- from io import BufferedIOBase
2
+ from io import BufferedIOBase, TextIOWrapper
3
3
 
4
- from biolib.typing_utils import Dict, List, Optional, Union
4
+ from biolib.typing_utils import Dict, List, Optional, Union, Iterator
5
5
 
6
6
 
7
7
  class SeqUtilRecord:
@@ -37,67 +37,75 @@ class SeqUtil:
37
37
  allow_any_sequence_characters: bool = False,
38
38
  allow_empty_sequence: bool = True,
39
39
  file_name: Optional[str] = None,
40
- ) -> List[SeqUtilRecord]:
40
+ ) -> Iterator[SeqUtilRecord]:
41
+ def process_and_yield_record(header: str, sequence_lines: list):
42
+ sequence = ''.join(sequence_lines)
43
+ sequence_id = header.split()[0]
44
+ if not allow_any_sequence_characters:
45
+ invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
46
+ if invalid_sequence_characters:
47
+ raise Exception(
48
+ f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
49
+ )
50
+ if not allow_empty_sequence and not sequence:
51
+ raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
52
+ yield SeqUtilRecord(
53
+ sequence=sequence,
54
+ sequence_id=sequence_id,
55
+ description=header[len(sequence_id):].strip()
56
+ )
57
+
58
+ def line_generator_from_buffered_io_base(file_handle: BufferedIOBase) -> Iterator[str]:
59
+ """Generates lines from a BufferedIOBase handle, decoding UTF-8."""
60
+ for line in file_handle:
61
+ yield line.decode('utf-8')
62
+
63
+ def line_generator_from_text_io_wrapper(file_handle: TextIOWrapper) -> Iterator[str]:
64
+ """Generates lines from a TextIOWrapper handle, decoding UTF-8."""
65
+ for line in file_handle:
66
+ yield line
67
+
41
68
  if input_file is None:
42
69
  if file_name:
43
70
  input_file = file_name
44
71
  else:
45
72
  raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
73
+
74
+ file_handle = None
46
75
  if isinstance(input_file, str):
47
- with open(input_file) as file_handle:
48
- data = file_handle.read().strip()
76
+ file_handle = open(input_file, "r")
77
+ line_iterator = line_generator_from_text_io_wrapper(file_handle)
49
78
  elif isinstance(input_file, BufferedIOBase):
50
- data = input_file.read().decode('utf-8')
79
+ line_iterator = line_generator_from_buffered_io_base(input_file)
51
80
  else:
52
81
  raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
53
- if not data:
54
- return []
55
-
56
- if '>' not in data:
57
- if default_header:
58
- lines_with_header = []
59
- for index, line in enumerate(data.split('\n')):
60
- index_string = str(index + 1) if index > 0 else ''
61
- lines_with_header.append(f'>{default_header}{index_string}\n{line}')
62
-
63
- data = '\n'.join(lines_with_header)
64
- else:
65
- raise Exception(f'No header line found in FASTA file "{file_name}"')
66
-
67
- splitted = []
68
- tmp_data = ''
69
- for line in data.splitlines():
70
- if line.startswith('>'):
71
- if tmp_data:
72
- splitted.append(tmp_data)
73
- tmp_data = line[1:].strip() + '\n'
74
- else:
75
- if line.strip():
76
- tmp_data += line.strip() + '\n'
77
-
78
- if tmp_data:
79
- splitted.append(tmp_data)
80
-
81
- parsed_sequences = []
82
- for sequence_data in splitted:
83
- sequence_data_splitted = sequence_data.strip().split('\n')
84
- header_line = sequence_data_splitted[0].split()
85
- sequence_id = header_line[0]
86
- description = sequence_data_splitted[0][len(sequence_id) :].strip()
87
- sequence = ''.join([seq.strip() for seq in sequence_data_splitted[1:]])
88
-
89
- if not allow_any_sequence_characters:
90
- invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
91
- if len(invalid_sequence_characters) > 0:
92
- raise Exception(
93
- f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
94
- )
95
- if not allow_empty_sequence and len(sequence) == 0:
96
- raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
97
-
98
- parsed_sequences.append(SeqUtilRecord(sequence=sequence, sequence_id=sequence_id, description=description))
99
82
 
100
- return parsed_sequences
83
+ header = None
84
+ sequence_lines: list[str] = []
85
+
86
+ try:
87
+ for line_number, line in enumerate(line_iterator):
88
+ line = line.strip()
89
+ if line.startswith('>'):
90
+ if header is not None:
91
+ yield from process_and_yield_record(header, sequence_lines)
92
+
93
+ header = line[1:].strip()
94
+ sequence_lines = []
95
+ else:
96
+ if header is None:
97
+ if default_header:
98
+ yield from process_and_yield_record(f"{default_header}{line_number}", [line])
99
+ else:
100
+ raise Exception(f'No header line found in FASTA file "{file_name}"')
101
+ else:
102
+ sequence_lines.append(line)
103
+
104
+ if header is not None:
105
+ yield from process_and_yield_record(header, sequence_lines)
106
+ finally:
107
+ if file_handle:
108
+ file_handle.close()
101
109
 
102
110
  @staticmethod
103
111
  def write_records_to_fasta(file_name: str, records: List[SeqUtilRecord]) -> None:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pybiolib"
3
- version = "1.2.71"
3
+ version = "1.2.99.dev1"
4
4
  description = "BioLib Python Client"
5
5
  readme = "PYPI_README.md"
6
6
  license = "MIT"
File without changes
File without changes