arkindex-base-worker 0.3.6rc1__py3-none-any.whl → 0.3.6rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. arkindex_base_worker-0.3.6rc2.dist-info/METADATA +39 -0
  2. arkindex_base_worker-0.3.6rc2.dist-info/RECORD +40 -0
  3. arkindex_worker/__init__.py +0 -1
  4. arkindex_worker/cache.py +19 -25
  5. arkindex_worker/image.py +16 -17
  6. arkindex_worker/models.py +17 -21
  7. arkindex_worker/utils.py +16 -17
  8. arkindex_worker/worker/__init__.py +14 -23
  9. arkindex_worker/worker/base.py +12 -7
  10. arkindex_worker/worker/classification.py +13 -15
  11. arkindex_worker/worker/dataset.py +3 -4
  12. arkindex_worker/worker/element.py +80 -75
  13. arkindex_worker/worker/entity.py +27 -29
  14. arkindex_worker/worker/metadata.py +19 -25
  15. arkindex_worker/worker/task.py +2 -3
  16. arkindex_worker/worker/training.py +21 -22
  17. arkindex_worker/worker/transcription.py +37 -34
  18. arkindex_worker/worker/version.py +1 -2
  19. tests/conftest.py +55 -75
  20. tests/test_base_worker.py +37 -31
  21. tests/test_cache.py +14 -7
  22. tests/test_dataset_worker.py +4 -4
  23. tests/test_element.py +0 -1
  24. tests/test_elements_worker/__init__.py +0 -1
  25. tests/test_elements_worker/test_classifications.py +0 -1
  26. tests/test_elements_worker/test_cli.py +22 -17
  27. tests/test_elements_worker/test_dataset.py +9 -10
  28. tests/test_elements_worker/test_elements.py +58 -63
  29. tests/test_elements_worker/test_entities.py +10 -20
  30. tests/test_elements_worker/test_metadata.py +72 -96
  31. tests/test_elements_worker/test_task.py +9 -10
  32. tests/test_elements_worker/test_training.py +20 -13
  33. tests/test_elements_worker/test_transcriptions.py +6 -10
  34. tests/test_elements_worker/test_worker.py +16 -14
  35. tests/test_image.py +21 -20
  36. tests/test_merge.py +5 -6
  37. tests/test_utils.py +0 -1
  38. arkindex_base_worker-0.3.6rc1.dist-info/METADATA +0 -27
  39. arkindex_base_worker-0.3.6rc1.dist-info/RECORD +0 -42
  40. arkindex_worker/git.py +0 -392
  41. tests/test_git.py +0 -480
  42. {arkindex_base_worker-0.3.6rc1.dist-info → arkindex_base_worker-0.3.6rc2.dist-info}/WHEEL +0 -0
  43. {arkindex_base_worker-0.3.6rc1.dist-info → arkindex_base_worker-0.3.6rc2.dist-info}/top_level.txt +0 -0
tests/test_merge.py CHANGED
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  from uuid import UUID
3
2
 
4
3
  import pytest
@@ -18,8 +17,8 @@ from arkindex_worker.cache import (
18
17
 
19
18
 
20
19
  @pytest.mark.parametrize(
21
- "parents, expected_elements, expected_transcriptions",
22
- (
20
+ ("parents", "expected_elements", "expected_transcriptions"),
21
+ [
23
22
  # Nothing happen when no parents are available
24
23
  ([], [], []),
25
24
  # Nothing happen when the parent file does not exist
@@ -73,7 +72,7 @@ from arkindex_worker.cache import (
73
72
  UUID("22222222-2222-2222-2222-222222222222"),
74
73
  ],
75
74
  ),
76
- ),
75
+ ],
77
76
  )
78
77
  def test_merge_databases(
79
78
  mock_databases, tmp_path, parents, expected_elements, expected_transcriptions
@@ -114,7 +113,7 @@ def test_merge_databases(
114
113
  ] == expected_transcriptions
115
114
 
116
115
 
117
- def test_merge_chunk(mock_databases, tmp_path, monkeypatch):
116
+ def test_merge_chunk(mock_databases, tmp_path):
118
117
  """
119
118
  Check the db merge algorithm support two parents
120
119
  and one of them has a chunk
@@ -155,7 +154,7 @@ def test_merge_chunk(mock_databases, tmp_path, monkeypatch):
155
154
 
156
155
 
157
156
  def test_merge_from_worker(
158
- responses, mock_base_worker_with_cache, mock_databases, tmp_path, monkeypatch
157
+ responses, mock_base_worker_with_cache, mock_databases, tmp_path
159
158
  ):
160
159
  """
161
160
  High level merge from the base worker
tests/test_utils.py CHANGED
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  from pathlib import Path
3
2
 
4
3
  from arkindex_worker.utils import close_delete_file, extract_tar_zst_archive
@@ -1,27 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: arkindex-base-worker
3
- Version: 0.3.6rc1
4
- Summary: Base Worker to easily build Arkindex ML workflows
5
- Home-page: https://teklia.com
6
- Author: Teklia
7
- Author-email: contact@teklia.com
8
- Requires-Python: >=3.7
9
- Requires-Dist: arkindex-client ==1.0.14
10
- Requires-Dist: peewee ==3.17.0
11
- Requires-Dist: Pillow ==10.1.0
12
- Requires-Dist: pymdown-extensions ==10.3.1
13
- Requires-Dist: python-gitlab ==4.1.1
14
- Requires-Dist: python-gnupg ==0.5.1
15
- Requires-Dist: sh ==2.0.6
16
- Requires-Dist: shapely ==2.0.2
17
- Requires-Dist: tenacity ==8.2.3
18
- Requires-Dist: zstandard ==0.22.0
19
- Provides-Extra: docs
20
- Requires-Dist: black ==23.11.0 ; extra == 'docs'
21
- Requires-Dist: doc8 ==1.1.1 ; extra == 'docs'
22
- Requires-Dist: mkdocs ==1.5.3 ; extra == 'docs'
23
- Requires-Dist: mkdocs-material ==9.4.8 ; extra == 'docs'
24
- Requires-Dist: mkdocstrings ==0.23.0 ; extra == 'docs'
25
- Requires-Dist: mkdocstrings-python ==1.7.3 ; extra == 'docs'
26
- Requires-Dist: recommonmark ==0.7.1 ; extra == 'docs'
27
-
@@ -1,42 +0,0 @@
1
- arkindex_worker/__init__.py,sha256=_CYBbq_4ZP9DodY4ZSMNzdj-cT3eAwpQHU86nGzVFUw,186
2
- arkindex_worker/cache.py,sha256=JWYgF8UtFT5tm_V_NPbApnWUqNaRaxMZdDyzMi5i65A,11054
3
- arkindex_worker/git.py,sha256=lUr4in7ddOehPx1_oD5xyZ-h8LF-6DuscSPzgZr4EJ4,15338
4
- arkindex_worker/image.py,sha256=SZphvx7Cdc8vzLhBYeaJLKqlphH_jKqzLsX9ss0474U,14183
5
- arkindex_worker/models.py,sha256=wc3Vda8kiKMyVsau8GeoVXDvP5HrgCNn9VYOA-zQosE,9637
6
- arkindex_worker/utils.py,sha256=XyQf-7oIUVg4-ofVyPwPnGOBmk-3pYn4ax93msDQh-I,6920
7
- arkindex_worker/worker/__init__.py,sha256=Y6tr5wBceGHwChw7Vrsis8R1Y1byCfh3hOPB82vj8Mw,19269
8
- arkindex_worker/worker/base.py,sha256=0DvOTcCatELwkhZS4C9jOLPkg8dLUlXuPWeifO7LR5E,19188
9
- arkindex_worker/worker/classification.py,sha256=rnodg2Twxfly9P9qeWRwqqdWERVY4MSPKvHLsvATyRg,10792
10
- arkindex_worker/worker/dataset.py,sha256=m_opGiSVDCbhCrK3vrs9TaZnssHgl_2x56G5h_BHVD8,2784
11
- arkindex_worker/worker/element.py,sha256=-jIxl8Fgv9z71oxZ9vl7dbMvJ8LHrYDTOQ7X6lXLBeE,30814
12
- arkindex_worker/worker/entity.py,sha256=Dz_uFYNp_k20VvjQ6DQ363O3FsoynwBgaK6k63nXr5g,13499
13
- arkindex_worker/worker/metadata.py,sha256=BsAsAGXGLBlUM5mW38ll-exhCwqyrMuxxZx9tOwqAjI,6426
14
- arkindex_worker/worker/task.py,sha256=FsLWh3unW5IZPn5Ze_3R6FuK5UYM6RG-xJAjITXM0Rc,1498
15
- arkindex_worker/worker/training.py,sha256=FJ5UfUDOgNyCi9Fow0LpVy2c_IMUndHka3qwGlI_BME,10204
16
- arkindex_worker/worker/transcription.py,sha256=KPqT_egO3zA4G8ua1oKlsKJPN_lTbF20okgyecDwEXc,18808
17
- arkindex_worker/worker/version.py,sha256=WypMzANWjIqj2_yPuwxGXH-nAfP2yE1gqATqtybr-RA,1460
18
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- tests/conftest.py,sha256=YRYkJmkmyMJfDKPKoItHPpXnMYwOdRNTAjUvnu4M1uU,22348
20
- tests/test_base_worker.py,sha256=hROeLAW79U6LvXwXAugAsQwj50hdzEjJK2_twh5g27s,24783
21
- tests/test_cache.py,sha256=QYtMaMqKppWndrbkxNwJYpVemEYOH7X4e9mB6POVIXw,10437
22
- tests/test_dataset_worker.py,sha256=-zX7NsOiSkyYQhcJj98dF-xj-1oiZBysFtRrljjPM_0,27691
23
- tests/test_element.py,sha256=_Ve7U33b2JHDF0wYJUwdJzV_wW8q-Y-4eMaKDaRQ0W8,13205
24
- tests/test_git.py,sha256=2rX81uka7a1T0qzlqN70epvVuJh9Ia41L6iNRZfxNnY,14944
25
- tests/test_image.py,sha256=5_3EjP-Oe4NH1FFQXLDd7kyxQlRDXaVzpH8OQSPFBbM,15220
26
- tests/test_merge.py,sha256=n7wwlrOs5x_1cHlcDKBMr6KPTtya91HzI9pZrvvHbw8,8375
27
- tests/test_utils.py,sha256=4e8DsljBXq568YDA6ZQZvFmh3uudRTD4ira6o9Sy3WI,1260
28
- tests/test_elements_worker/__init__.py,sha256=dxec3bdwmoXRIK2jw4d1zYrzfvuG_82tE3IyK46ZsYc,203
29
- tests/test_elements_worker/test_classifications.py,sha256=hvRhf2nEr34SZSRpIxNy2EiufnMHcLIdIkWKfckN4YU,32051
30
- tests/test_elements_worker/test_cli.py,sha256=m0595219Y4SCL7UjuoVL0wxeUUjl_B0mq3OU_O_ybZk,2751
31
- tests/test_elements_worker/test_dataset.py,sha256=KaI08MjApZMoaUm0tF321KR6eHCDg0o0arPT_yyyi0Y,12075
32
- tests/test_elements_worker/test_elements.py,sha256=MtiVAVnBFgQ3_6Ia82OhaLngwaxP3cl7jKuJ7MN8r_k,75656
33
- tests/test_elements_worker/test_entities.py,sha256=sdsWPndrioqDlkpnsHZ8YwHZUyGEskvgfcFwOPjb0fI,33866
34
- tests/test_elements_worker/test_metadata.py,sha256=cI-jjB6WOkQGr52bgffwSyPo7Rg4m-HOMTfOOTqMHRQ,17813
35
- tests/test_elements_worker/test_task.py,sha256=abe0fvHqPpOZV5X6PoDYJNKMDpCNxc0m5pyb4s5iG9U,6324
36
- tests/test_elements_worker/test_training.py,sha256=D96h8rCvjyo_KJ7xScT5mdF1i97qaL1Fq6aiXRMeIdg,8292
37
- tests/test_elements_worker/test_transcriptions.py,sha256=DntYx0rCV2Og3gYxOuBu9-skHYruOBYvGUvOVz4FFiU,68746
38
- tests/test_elements_worker/test_worker.py,sha256=IRyB7mcsTyV7UsHVuP-P4ViFYaXFA3QgCh5hh7iuNdk,16364
39
- arkindex_base_worker-0.3.6rc1.dist-info/METADATA,sha256=zVyuHDcUYANdshXURYG7JQllDGDKPdckD1HAGDo-drw,960
40
- arkindex_base_worker-0.3.6rc1.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
41
- arkindex_base_worker-0.3.6rc1.dist-info/top_level.txt,sha256=TtagLI8LSv7GE7nG8MQqDFAJ5bNDPJn7Z5vizOgrWkA,22
42
- arkindex_base_worker-0.3.6rc1.dist-info/RECORD,,
arkindex_worker/git.py DELETED
@@ -1,392 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Helper classes for workers that interact with Git repositories and the GitLab API.
4
- """
5
- import shutil
6
- import time
7
- from datetime import datetime
8
- from pathlib import Path
9
- from typing import Optional, Union
10
-
11
- import gitlab
12
- import requests
13
- import sh
14
- from gitlab.v4.objects import MergeRequest, ProjectMergeRequest
15
-
16
- from arkindex_worker import logger
17
-
18
- NOTHING_TO_COMMIT_MSG = "nothing to commit, working tree clean"
19
- MR_HAS_CONFLICTS_ERROR_CODE = 406
20
-
21
-
22
- class GitlabHelper:
23
- """Helper class to save files to GitLab repository"""
24
-
25
- def __init__(
26
- self,
27
- project_id: str,
28
- gitlab_url: str,
29
- gitlab_token: str,
30
- branch: str,
31
- rebase_wait_period: Optional[int] = 1,
32
- delete_source_branch: Optional[bool] = True,
33
- max_rebase_tries: Optional[int] = 10,
34
- ):
35
- """
36
- :param project_id: the id of the gitlab project
37
- :param gitlab_url: gitlab server url
38
- :param gitlab_token: gitlab private token of user with permission to accept merge requests
39
- :param branch: name of the branch to where the exported branch will be merged
40
- :param rebase_wait_period: seconds to wait between each poll to check whether rebase has finished
41
- :param delete_source_branch: should delete the source branch after merging?
42
- :param max_rebase_tries: max number of tries to rebase when merging before giving up
43
- """
44
- self.project_id = project_id
45
- self.gitlab_url = gitlab_url
46
- self.gitlab_token = str(gitlab_token).strip()
47
- self.branch = branch
48
- self.rebase_wait_period = rebase_wait_period
49
- self.delete_source_branch = delete_source_branch
50
- self.max_rebase_tries = max_rebase_tries
51
-
52
- logger.info("Creating a Gitlab client")
53
- self._api = gitlab.Gitlab(self.gitlab_url, private_token=self.gitlab_token)
54
- self.project = self._api.projects.get(self.project_id)
55
- self.is_rebase_finished = False
56
-
57
- def merge(self, branch_name: str, title: str) -> bool:
58
- """
59
- Create a merge request and try to merge.
60
- Always rebase first to avoid conflicts from MRs made in parallel
61
- :param branch_name: Source branch name
62
- :param title: Title of the merge request
63
- :return: Whether the branch was successfully merged
64
- """
65
- mr = None
66
- # always rebase first, because other workers might have merged already
67
- for i in range(self.max_rebase_tries):
68
- logger.info(f"Trying to merge, try nr: {i}")
69
- try:
70
- if mr is None:
71
- mr = self._create_merge_request(branch_name, title)
72
-
73
- mr.rebase()
74
- rebase_success = self._wait_for_rebase_to_finish(mr.iid)
75
- if not rebase_success:
76
- logger.error("Rebase failed, won't be able to merge!")
77
- return False
78
-
79
- mr.merge(should_remove_source_branch=self.delete_source_branch)
80
- logger.info("Merge successful")
81
- return True
82
- except gitlab.GitlabMRClosedError as e:
83
- if e.response_code == MR_HAS_CONFLICTS_ERROR_CODE:
84
- logger.info("Merge failed, trying to rebase and merge again.")
85
- continue
86
- else:
87
- logger.error(f"Merge was not successful: {e}")
88
- return False
89
- except gitlab.GitlabError as e:
90
- logger.error(f"Gitlab error: {e}")
91
- if 400 <= e.response_code < 500:
92
- # 4XX errors shouldn't be fixed by retrying
93
- raise e
94
- except requests.exceptions.ConnectionError as e:
95
- logger.error(f"Server connection error, will wait and retry: {e}")
96
- time.sleep(self.rebase_wait_period)
97
-
98
- return False
99
-
100
- def _create_merge_request(self, branch_name: str, title: str) -> MergeRequest:
101
- """
102
- Create a MergeRequest towards the branch with the given title
103
-
104
- :param branch_name: Target branch of the merge request
105
- :param title: Title of the merge request
106
- :return: The created merge request
107
- """
108
- logger.info(f"Creating a merge request for {branch_name}")
109
- # retry_transient_error will retry the request on 50X errors
110
- # https://github.com/python-gitlab/python-gitlab/blob/265dbbdd37af88395574564aeb3fd0350288a18c/gitlab/__init__.py#L539
111
- mr = self.project.mergerequests.create(
112
- {
113
- "source_branch": branch_name,
114
- "target_branch": self.branch,
115
- "title": title,
116
- },
117
- )
118
- return mr
119
-
120
- def _get_merge_request(
121
- self, merge_request_id: Union[str, int], include_rebase_in_progress: bool = True
122
- ) -> ProjectMergeRequest:
123
- """
124
- Retrieve a merge request by ID
125
- :param merge_request_id: The ID of the merge request
126
- :param include_rebase_in_progress: Whether the rebase in progree should be included
127
- :return: The related merge request
128
- """
129
- return self.project.mergerequests.get(
130
- merge_request_id, include_rebase_in_progress=include_rebase_in_progress
131
- )
132
-
133
- def _wait_for_rebase_to_finish(self, merge_request_id: Union[str, int]) -> bool:
134
- """
135
- Poll the merge request until it has finished rebasing
136
- :param merge_request_id: The ID of the merge request
137
- :return: Whether the rebase has finished successfully
138
- """
139
-
140
- logger.info("Checking if rebase has finished..")
141
- self.is_rebase_finished = False
142
- while not self.is_rebase_finished:
143
- time.sleep(self.rebase_wait_period)
144
- mr = self._get_merge_request(merge_request_id)
145
- self.is_rebase_finished = not mr.rebase_in_progress
146
- if mr.merge_error is None:
147
- logger.info("Rebase has finished")
148
- return True
149
-
150
- logger.error(f"Rebase failed: {mr.merge_error}")
151
- return False
152
-
153
-
154
- def make_backup(path: str):
155
- """
156
- Create a backup file in the same directory with timestamp as suffix ".bak_{timestamp}"
157
- :param path: Path to the file to be backed up
158
- """
159
- path = Path(path)
160
- if not path.exists():
161
- raise ValueError(f"No file to backup! File not found: {path}")
162
- # timestamp with milliseconds
163
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
164
- backup_path = Path(str(path) + f".bak_{timestamp}")
165
- shutil.copy(path, backup_path)
166
- logger.info(f"Made a backup {backup_path}")
167
-
168
-
169
- def prepare_git_key(
170
- private_key: str,
171
- known_hosts: str,
172
- private_key_path: Optional[str] = "~/.ssh/id_ed25519",
173
- known_hosts_path: Optional[str] = "~/.ssh/known_hosts",
174
- ):
175
- """
176
- Prepare the git keys (put them in to the correct place) so that git could be used.
177
- Fixes some whitespace problems that come from arkindex secrets store (Django admin).
178
-
179
- Also creates a backup of the previous keys if they exist, to avoid losing the
180
- original keys of the developers.
181
-
182
- :param private_key: git private key contents
183
- :param known_hosts: git known_hosts contents
184
- :param private_key_path: path where to put the private key
185
- :param known_hosts_path: path where to put the known_hosts
186
- """
187
- # secrets admin UI seems to strip the trailing whitespace
188
- # but git requires the key file to have a new line at the end
189
- # for some reason uses CRLF line endings, but git doesn't like that
190
- private_key = private_key.replace("\r", "") + "\n"
191
- known_hosts = known_hosts.replace("\r", "") + "\n"
192
-
193
- private_key_path = Path(private_key_path).expanduser()
194
- known_hosts_path = Path(known_hosts_path).expanduser()
195
-
196
- if private_key_path.exists():
197
- if private_key_path.read_text() != private_key:
198
- make_backup(private_key_path)
199
-
200
- if known_hosts_path.exists():
201
- if known_hosts_path.read_text() != known_hosts:
202
- make_backup(known_hosts_path)
203
-
204
- private_key_path.write_text(private_key)
205
- # private key must be private, otherwise git will fail
206
- # expecting octal for permissions
207
- private_key_path.chmod(0o600)
208
- known_hosts_path.write_text(known_hosts)
209
-
210
- logger.info(f"Private key size after: {private_key_path.stat().st_size}")
211
- logger.info(f"Known size after: {known_hosts_path.stat().st_size}")
212
-
213
-
214
- class GitHelper:
215
- """
216
- A helper class for running git commands
217
-
218
- At the beginning of the workflow call [run_clone_in_background][arkindex_worker.git.GitHelper.run_clone_in_background].
219
- When all the files are ready to be added to git then call
220
- [save_files][arkindex_worker.git.GitHelper.save_files] to move the files in to the git repository
221
- and try to push them.
222
-
223
- Examples
224
- --------
225
- in worker.configure() configure the git helper and start the cloning:
226
- ```
227
- gitlab = GitlabHelper(...)
228
- prepare_git_key(...)
229
- self.git_helper = GitHelper(workflow_id=workflow_id, gitlab_helper=gitlab, ...)
230
- self.git_helper.run_clone_in_background()
231
- ```
232
-
233
- at the end of the workflow (at the end of worker.run()) push the files to git:
234
- ```
235
- self.git_helper.save_files(self.out_dir)
236
- ```
237
- """
238
-
239
- def __init__(
240
- self,
241
- repo_url,
242
- git_dir,
243
- export_path,
244
- workflow_id,
245
- gitlab_helper: GitlabHelper,
246
- git_clone_wait_period=1,
247
- ):
248
- """
249
-
250
- :param repo_url: the url of the git repository where the export will be pushed
251
- :param git_dir: the directory where to clone the git repository
252
- :param export_path: the path inside the git repository where to put the exported files
253
- :param workflow_id: the process id to see the workflow graph in the frontend
254
- :param gitlab_helper: helper for gitlab
255
- :param git_clone_wait_period: check if clone has finished every N seconds at the end of the workflow
256
- """
257
- logger.info("Creating git helper")
258
- self.repo_url = repo_url
259
- self.git_dir = Path(git_dir)
260
- self.export_path = self.git_dir / export_path
261
- self.workflow_id = workflow_id
262
- self.gitlab_helper = gitlab_helper
263
- self.git_clone_wait_period = git_clone_wait_period
264
- self.is_clone_finished = False
265
- self.cmd = None
266
- self.success = None
267
- self.exit_code = None
268
-
269
- self.git_dir.mkdir(parents=True, exist_ok=True)
270
- # run git commands outside of the repository (no need to change dir)
271
- self._git = sh.git.bake("-C", self.git_dir)
272
-
273
- def _clone_done(self, cmd, success, exit_code):
274
- """
275
- Method that is called when git clone has finished in the background
276
- """
277
- logger.info("Finishing cloning")
278
- self.cmd = cmd
279
- self.success = success
280
- self.exit_code = exit_code
281
- self.is_clone_finished = True
282
- if not success:
283
- logger.error(f"Clone failed: {cmd} : {success} : {exit_code}")
284
- logger.info("Cloning finished")
285
-
286
- def run_clone_in_background(self):
287
- """
288
- Clones the git repository in the background in to the self.git_dir directory.
289
-
290
- `self.is_clone_finished` can be used to know whether the cloning has finished
291
- or not.
292
- """
293
- logger.info(f"Starting clone {self.repo_url} in background")
294
- cmd = sh.git.clone(
295
- self.repo_url, self.git_dir, _bg=True, _done=self._clone_done
296
- )
297
- logger.info(f"Continuing clone {self.repo_url} in background")
298
- return cmd
299
-
300
- def _wait_for_clone_to_finish(self):
301
- logger.info("Checking if cloning has finished..")
302
- while not self.is_clone_finished:
303
- time.sleep(self.git_clone_wait_period)
304
- logger.info("Cloning has finished")
305
-
306
- if not self.success:
307
- logger.error("Clone was not a success")
308
- logger.error(f"Clone error exit code: {str(self.exit_code)}")
309
- raise ValueError("Clone was not a success")
310
-
311
- def save_files(self, export_out_dir: Path):
312
- """
313
- Move files in export_out_dir to the cloned git repository
314
- and try to merge the created files if possible.
315
- :param export_out_dir: Path to the files to be saved
316
- :raises sh.ErrorReturnCode: _description_
317
- :raises Exception: _description_
318
- """
319
- self._wait_for_clone_to_finish()
320
-
321
- # move exported files to git directory
322
- file_count = self._move_files_to_git(export_out_dir)
323
-
324
- # use timestamp to avoid branch name conflicts with multiple chunks
325
- current_timestamp = datetime.isoformat(datetime.now())
326
- # ":" is not allowed in a branch name
327
- branch_timestamp = current_timestamp.replace(":", ".")
328
- # add files to a new branch
329
- branch_name = f"workflow_{self.workflow_id}_{branch_timestamp}"
330
- self._git.checkout("-b", branch_name)
331
- self._git.add("-A")
332
- try:
333
- self._git.commit(
334
- "-m",
335
- f"Exported files from workflow: {self.workflow_id} at {current_timestamp}",
336
- )
337
- except sh.ErrorReturnCode as e:
338
- if NOTHING_TO_COMMIT_MSG in str(e.stdout):
339
- logger.warning("Nothing to commit (no changes)")
340
- return
341
- else:
342
- logger.error(f"Commit failed:: {e}")
343
- raise e
344
-
345
- # count the number of lines in the output
346
- wc_cmd_out = str(
347
- sh.wc(self._git.show("--stat", "--name-status", "--oneline", "HEAD"), "-l")
348
- )
349
- # -1 because the of the git command header
350
- files_committed = int(wc_cmd_out.strip()) - 1
351
- logger.info(f"Committed {files_committed} files")
352
- if file_count != files_committed:
353
- logger.warning(
354
- f"Of {file_count} added files only {files_committed} were committed"
355
- )
356
-
357
- self._git.push("-u", "origin", "HEAD")
358
-
359
- if self.gitlab_helper:
360
- try:
361
- self.gitlab_helper.merge(branch_name, f"Merge {branch_name}")
362
- except Exception as e:
363
- logger.error(f"Merge failed: {e}")
364
- raise e
365
- else:
366
- logger.info(
367
- "No gitlab_helper defined, not trying to merge the pushed branch"
368
- )
369
-
370
- def _move_files_to_git(self, export_out_dir: Path) -> int:
371
- """
372
- Move all files in the export_out_dir to the git repository
373
- while keeping the same directory structure
374
- :param export_out_dir: Path to the files to be moved
375
- :return: Total count of moved files
376
- """
377
- file_count = 0
378
- file_names = [
379
- file_name for file_name in export_out_dir.rglob("*") if file_name.is_file()
380
- ]
381
- for file in file_names:
382
- rel_file_path = file.relative_to(export_out_dir)
383
- out_file = self.export_path / rel_file_path
384
- if not out_file.exists():
385
- out_file.parent.mkdir(parents=True, exist_ok=True)
386
- # rename does not work if the source and destination are not on the same mounts
387
- # it will give an error: "OSError: [Errno 18] Invalid cross-device link:"
388
- shutil.copy(file, out_file)
389
- file.unlink()
390
- file_count += 1
391
- logger.info(f"Moved {file_count} files")
392
- return file_count