pybioos 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pybioos might be problematic. Click here for more details.
- bioos/__about__.py +4 -0
- bioos/__init__.py +1 -0
- bioos/bioos.py +90 -0
- bioos/bioos_workflow.py +284 -0
- bioos/config.py +147 -0
- bioos/errors.py +89 -0
- bioos/internal/__init__.py +1 -0
- bioos/internal/tos.py +306 -0
- bioos/log.py +125 -0
- bioos/models/__init__.py +1 -0
- bioos/models/models.py +13 -0
- bioos/resource/__init__.py +1 -0
- bioos/resource/data_models.py +157 -0
- bioos/resource/files.py +229 -0
- bioos/resource/utility.py +45 -0
- bioos/resource/workflows.py +590 -0
- bioos/resource/workspaces.py +123 -0
- bioos/service/BioOsService.py +191 -0
- bioos/service/__init__.py +1 -0
- bioos/service/api.py +291 -0
- bioos/service/config.py +37 -0
- bioos/tests/__init__.py +0 -0
- bioos/tests/base.py +21 -0
- bioos/tests/bioos.py +43 -0
- bioos/tests/data_models.py +259 -0
- bioos/tests/files.py +174 -0
- bioos/tests/utils.py +68 -0
- bioos/tests/workflows.py +287 -0
- bioos/tests/workspaces.py +115 -0
- bioos/utils/__init__.py +0 -0
- bioos/utils/common_tools.py +57 -0
- bioos/utils/workflows.py +2 -0
- pybioos-0.0.3.dist-info/LICENSE +21 -0
- pybioos-0.0.3.dist-info/METADATA +24 -0
- pybioos-0.0.3.dist-info/RECORD +38 -0
- pybioos-0.0.3.dist-info/WHEEL +5 -0
- pybioos-0.0.3.dist-info/entry_points.txt +2 -0
- pybioos-0.0.3.dist-info/top_level.txt +1 -0
bioos/tests/bioos.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from unittest import mock
|
|
2
|
+
from unittest.mock import patch
|
|
3
|
+
|
|
4
|
+
from pandas import DataFrame
|
|
5
|
+
|
|
6
|
+
from bioos import bioos
|
|
7
|
+
from bioos.config import Config
|
|
8
|
+
from bioos.tests.base import BaseInit
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestBioOs(BaseInit):
|
|
12
|
+
|
|
13
|
+
def test_login_status(self):
|
|
14
|
+
status = bioos.status()
|
|
15
|
+
with patch.object(Config, "_ping_func") as success_ping:
|
|
16
|
+
self.assertIsInstance(status, Config.LoginInfo)
|
|
17
|
+
self.assertEqual(status.access_key, self.ak)
|
|
18
|
+
self.assertEqual(status.secret_key, self.sk)
|
|
19
|
+
self.assertEqual(status.endpoint, self.endpoint)
|
|
20
|
+
self.assertEqual(status.login_status, "Already logged in")
|
|
21
|
+
success_ping.assert_called_once()
|
|
22
|
+
with patch.object(Config, "_ping_func", side_effect=Exception(b'foo')) as fail_ping:
|
|
23
|
+
fake_ak = "aaa"
|
|
24
|
+
fake_sk = "bbb"
|
|
25
|
+
fake_endpoint = "http://fake.endpoint.com"
|
|
26
|
+
fake_region = "region"
|
|
27
|
+
bioos.login(fake_endpoint, fake_ak, fake_sk, fake_region)
|
|
28
|
+
status = bioos.status()
|
|
29
|
+
self.assertIsInstance(status, Config.LoginInfo)
|
|
30
|
+
self.assertEqual(status.access_key, fake_ak)
|
|
31
|
+
self.assertEqual(status.secret_key, fake_sk)
|
|
32
|
+
self.assertEqual(status.endpoint, fake_endpoint)
|
|
33
|
+
self.assertEqual(status.region, fake_region)
|
|
34
|
+
self.assertEqual(status.login_status, "Not logged in")
|
|
35
|
+
fail_ping.assert_has_calls([mock.call(), mock.call()])
|
|
36
|
+
|
|
37
|
+
def test_list_workspaces(self):
|
|
38
|
+
with patch.object(Config.service(), "list_workspaces",
|
|
39
|
+
return_value={"Items": [{}]}) as success_list:
|
|
40
|
+
workspaces = bioos.list_workspaces()
|
|
41
|
+
self.assertIsInstance(workspaces, DataFrame)
|
|
42
|
+
success_list.assert_called_once()
|
|
43
|
+
success_list.assert_called_with({"PageSize": 0})
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
from unittest import mock
|
|
3
|
+
from unittest.mock import patch
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pandas.testing
|
|
7
|
+
from pandas import DataFrame
|
|
8
|
+
|
|
9
|
+
from bioos import bioos
|
|
10
|
+
from bioos.errors import ConflictError, NotFoundError
|
|
11
|
+
from bioos.service.BioOsService import BioOsService
|
|
12
|
+
from bioos.tests.base import BaseInit
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestDataModel(BaseInit):
|
|
16
|
+
list_data_models_val = {'TotalCount': 9, 'Items': [
|
|
17
|
+
{'ID': 'dcblq1tteig44bng68od0', 'Name': 'jxc', 'RowCount': 162, 'Type': 'normal'},
|
|
18
|
+
{'ID': 'dccc0ne5eig41ascop420', 'Name': 'run', 'RowCount': 499, 'Type': 'normal'},
|
|
19
|
+
{'ID': 'dccc0nkleig41ascop42g', 'Name': 'sample', 'RowCount': 3, 'Type': 'normal'},
|
|
20
|
+
{'ID': 'dccc0nmteig41ascop430', 'Name': 'sample6', 'RowCount': 1922, 'Type': 'normal'},
|
|
21
|
+
{'ID': 'dccdaq2deig42s7rgs7j0', 'Name': 'sample_set', 'RowCount': 1, 'Type': 'set'},
|
|
22
|
+
{'ID': 'dccc0o0teig41ascop43g', 'Name': 'test', 'RowCount': 3000, 'Type': 'normal'},
|
|
23
|
+
{'ID': 'dccdaq6leig42s7rgs7jg', 'Name': 'test_set', 'RowCount': 1, 'Type': 'set'},
|
|
24
|
+
{'ID': 'dccc0o2teig41ascop440', 'Name': 'testaa', 'RowCount': 3000, 'Type': 'normal'},
|
|
25
|
+
{'ID': 'dcc6tbkleig4c9lddjt2g', 'Name': 'workspace_data', 'RowCount': 257788, 'Type': 'workspace'}]}
|
|
26
|
+
list_data_model_rows_val = \
|
|
27
|
+
{
|
|
28
|
+
'TotalCount': 3,
|
|
29
|
+
'Headers':
|
|
30
|
+
['sample_id', 'column-1-file-CRAM', 'date',
|
|
31
|
+
'123456789s_123456789s_123456789s_123456789s_', 'gg', 'hh', 'jj'],
|
|
32
|
+
'Rows': [
|
|
33
|
+
[
|
|
34
|
+
'your-sample-1-id', 'OK', '01/01/2022', 'we啊', 'abc',
|
|
35
|
+
's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/b6d2bf67-760c-4c56-b5b0-30a2e08dc180/call-step1/execution/resp.txt',
|
|
36
|
+
'abc'
|
|
37
|
+
],
|
|
38
|
+
[
|
|
39
|
+
'your-sample-2-id', 'OK', '01/01/2022', "s'd", 'abc',
|
|
40
|
+
's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/cccd6f44-a53b-4f77-8b0a-b72768aba55a/call-step1/execution/resp.txt',
|
|
41
|
+
'abc'
|
|
42
|
+
],
|
|
43
|
+
[
|
|
44
|
+
'your-sample-3-id', 'OK', '01/01/2022', "s'd", 'abc',
|
|
45
|
+
's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/c86ce783-6dd7-47e1-88ba-e282e5b0d2c4/call-step1/execution/resp.txt',
|
|
46
|
+
'abc'
|
|
47
|
+
]
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
list_data_model_rows_id = {
|
|
51
|
+
"RowIDs": [
|
|
52
|
+
"your-sample-1-id",
|
|
53
|
+
"your-sample-2-id",
|
|
54
|
+
"your-sample-changed"
|
|
55
|
+
]
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
sample_data = pd.DataFrame(
|
|
59
|
+
{'sample_id': {0: 'your-sample-1-id', 1: 'your-sample-2-id', 2: 'your-sample-3-id'},
|
|
60
|
+
'column-1-file-CRAM': {0: 'OK', 1: 'OK', 2: 'OK'},
|
|
61
|
+
'date': {0: '01/01/2022', 1: '01/01/2022', 2: '01/01/2022'},
|
|
62
|
+
'123456789s_123456789s_123456789s_123456789s_': {0: 'we啊', 1: "s'd", 2: "s'd"},
|
|
63
|
+
'gg': {0: 'abc', 1: 'abc', 2: 'abc'}, 'hh': {
|
|
64
|
+
0: 's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/b6d2bf67-760c-4c56-b5b0-30a2e08dc180/call-step1/execution/resp.txt',
|
|
65
|
+
1: 's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/cccd6f44-a53b-4f77-8b0a-b72768aba55a/call-step1/execution/resp.txt',
|
|
66
|
+
2: 's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/c86ce783-6dd7-47e1-88ba-e282e5b0d2c4/call-step1/execution/resp.txt'},
|
|
67
|
+
'jj': {0: 'abc', 1: 'abc', 2: 'abc'}})
|
|
68
|
+
sample_data_to_write = pd.DataFrame(
|
|
69
|
+
{'sample_id': {0: 'your-sample-1-id', 1: 'your-sample-2-id', 2: 'your-sample-changed'},
|
|
70
|
+
'column-1-file-CRAM': {0: 'OK', 1: 'OK', 2: 'OK'},
|
|
71
|
+
'date': {0: '01/01/2022', 1: '01/01/2022', 2: '01/01/2022'},
|
|
72
|
+
'123456789s_123456789s_123456789s_123456789s_': {0: 'we啊', 1: "s'd", 2: "s'd"},
|
|
73
|
+
'gg': {0: 'abc', 1: 'abc', 2: 'abc'}, 'hh': {
|
|
74
|
+
0: 's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/b6d2bf67-760c-4c56-b5b0-30a2e08dc180/call-step1/execution/resp.txt',
|
|
75
|
+
1: 's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/cccd6f44-a53b-4f77-8b0a-b72768aba55a/call-step1/execution/resp.txt',
|
|
76
|
+
2: 's3://bioos-transfertest-testfake_workspace/analysis/scc9oqc5eig42lnksf7qg/test/c86ce783-6dd7-47e1-88ba-e282e5b0d2c4/call-step1/execution/resp.txt'},
|
|
77
|
+
'jj': {0: 'abc', 1: 'abc', 2: 'abc'}})
|
|
78
|
+
|
|
79
|
+
def __init__(self, *args, **kwargs):
|
|
80
|
+
super(TestDataModel, self).__init__(*args, **kwargs)
|
|
81
|
+
self.data_models = bioos.workspace(self.workspace_id).data_models
|
|
82
|
+
|
|
83
|
+
def test_repr(self):
|
|
84
|
+
with patch.object(BioOsService, "list_data_models",
|
|
85
|
+
return_value=self.list_data_models_val) as success_list:
|
|
86
|
+
# makes no call
|
|
87
|
+
self.data_models
|
|
88
|
+
# call list_data_models once
|
|
89
|
+
repr(self.data_models)
|
|
90
|
+
# makes another list_data_models call
|
|
91
|
+
pandas.testing.assert_frame_equal(self.data_models.list(), DataFrame.from_records([
|
|
92
|
+
{"ID": "dcblq1tteig44bng68od0", "Name": "jxc",
|
|
93
|
+
"RowCount": 162, "Type": "normal"},
|
|
94
|
+
{"ID": "dccc0ne5eig41ascop420", "Name": "run",
|
|
95
|
+
"RowCount": 499, "Type": "normal"},
|
|
96
|
+
{"ID": "dccc0nkleig41ascop42g", "Name": "sample",
|
|
97
|
+
"RowCount": 3, "Type": "normal"},
|
|
98
|
+
{"ID": "dccc0nmteig41ascop430", "Name": "sample6",
|
|
99
|
+
"RowCount": 1922, "Type": "normal"},
|
|
100
|
+
{"ID": "dccc0o0teig41ascop43g", "Name": "test",
|
|
101
|
+
"RowCount": 3000, "Type": "normal"},
|
|
102
|
+
{"ID": "dccc0o2teig41ascop440", "Name": "testaa",
|
|
103
|
+
"RowCount": 3000, "Type": "normal"},
|
|
104
|
+
]))
|
|
105
|
+
success_list.assert_has_calls([
|
|
106
|
+
mock.call({
|
|
107
|
+
'WorkspaceID': self.workspace_id
|
|
108
|
+
}), mock.call({
|
|
109
|
+
'WorkspaceID': self.workspace_id
|
|
110
|
+
})
|
|
111
|
+
])
|
|
112
|
+
|
|
113
|
+
def test_read_all(self):
|
|
114
|
+
with patch.object(BioOsService, "list_data_models",
|
|
115
|
+
return_value=self.list_data_models_val) as success_list:
|
|
116
|
+
with patch.object(BioOsService, "list_data_model_rows",
|
|
117
|
+
return_value={}) as success_rows_list:
|
|
118
|
+
self.data_models.read()
|
|
119
|
+
success_list.assert_called_once_with({
|
|
120
|
+
'WorkspaceID': self.workspace_id,
|
|
121
|
+
})
|
|
122
|
+
calls = []
|
|
123
|
+
for model in self.list_data_models_val.get("Items"):
|
|
124
|
+
if model.get("Type") == "normal":
|
|
125
|
+
calls.append(mock.call({
|
|
126
|
+
'WorkspaceID': self.workspace_id,
|
|
127
|
+
'ID': model["ID"],
|
|
128
|
+
'PageSize': 0,
|
|
129
|
+
}))
|
|
130
|
+
success_rows_list.assert_has_calls(
|
|
131
|
+
calls
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
def test_crud(self):
|
|
135
|
+
new_sample = "new_sample"
|
|
136
|
+
new_sample_id = "acbq1tteig44bng68oa0"
|
|
137
|
+
data_to_write = {"sample": self.sample_data_to_write,
|
|
138
|
+
new_sample: self.sample_data_to_write}
|
|
139
|
+
with patch.object(BioOsService, "delete_data_model_rows_and_headers") as success_delete:
|
|
140
|
+
with patch.object(BioOsService, "create_data_model",
|
|
141
|
+
return_value={
|
|
142
|
+
"ID": new_sample_id
|
|
143
|
+
}) as success_create:
|
|
144
|
+
|
|
145
|
+
with patch.object(BioOsService, "list_data_models",
|
|
146
|
+
return_value=self.list_data_models_val) as success_list:
|
|
147
|
+
with patch.object(BioOsService, "list_data_model_rows",
|
|
148
|
+
return_value=self.list_data_model_rows_val) as success_rows_list:
|
|
149
|
+
read_res = self.data_models.read("sample")
|
|
150
|
+
self.assertEqual(len(read_res), 1)
|
|
151
|
+
self.assertIsInstance(read_res, dict)
|
|
152
|
+
pandas.testing.assert_frame_equal(read_res["sample"], self.sample_data)
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
self.data_models.write(data_to_write, force=False)
|
|
156
|
+
except ConflictError as e:
|
|
157
|
+
self.assertEqual(e.message,
|
|
158
|
+
"parameter 'sources' conflicts: {'sample'} "
|
|
159
|
+
"already exists, pls use force=True to overwrite")
|
|
160
|
+
success_create.assert_not_called()
|
|
161
|
+
|
|
162
|
+
self.data_models.write(data_to_write, force=True)
|
|
163
|
+
success_create.assert_has_calls([
|
|
164
|
+
mock.call({
|
|
165
|
+
'WorkspaceID': self.workspace_id,
|
|
166
|
+
'Name': "sample",
|
|
167
|
+
'Headers': list(self.sample_data_to_write.head()),
|
|
168
|
+
'Rows': self.sample_data_to_write.values.tolist(),
|
|
169
|
+
}),
|
|
170
|
+
mock.call({
|
|
171
|
+
'WorkspaceID': self.workspace_id,
|
|
172
|
+
'Name': new_sample,
|
|
173
|
+
'Headers': list(self.sample_data_to_write.head()),
|
|
174
|
+
'Rows': self.sample_data_to_write.values.tolist(),
|
|
175
|
+
})
|
|
176
|
+
])
|
|
177
|
+
|
|
178
|
+
success_list.assert_has_calls([
|
|
179
|
+
mock.call({
|
|
180
|
+
'WorkspaceID': self.workspace_id,
|
|
181
|
+
}),
|
|
182
|
+
mock.call({
|
|
183
|
+
'WorkspaceID': self.workspace_id,
|
|
184
|
+
}),
|
|
185
|
+
])
|
|
186
|
+
success_rows_list.assert_called_once_with({
|
|
187
|
+
'WorkspaceID': self.workspace_id,
|
|
188
|
+
'ID': "dccc0nkleig41ascop42g",
|
|
189
|
+
'PageSize': 0,
|
|
190
|
+
})
|
|
191
|
+
|
|
192
|
+
new_list_data_models_val = copy.deepcopy(self.list_data_models_val)
|
|
193
|
+
new_list_data_models_val["TotalCount"] = 10
|
|
194
|
+
new_list_data_models_val['Items'].append(
|
|
195
|
+
{'ID': new_sample_id, 'Name': new_sample, 'RowCount': 3, 'Type': 'normal'})
|
|
196
|
+
|
|
197
|
+
new_list_data_model_rows_val = copy.deepcopy(self.list_data_model_rows_val)
|
|
198
|
+
new_list_data_model_rows_val['Rows'][2][0] = "your-sample-changed"
|
|
199
|
+
with patch.object(BioOsService, "list_data_models",
|
|
200
|
+
return_value=new_list_data_models_val) as success_list:
|
|
201
|
+
with patch.object(BioOsService, "list_data_model_rows",
|
|
202
|
+
return_value=new_list_data_model_rows_val) as success_rows_list:
|
|
203
|
+
with patch.object(BioOsService, "list_data_model_row_ids",
|
|
204
|
+
return_value=self.list_data_model_rows_id) as success_rows_id_list:
|
|
205
|
+
sample_res = self.data_models.read("sample")
|
|
206
|
+
self.assertEqual(len(sample_res), 1)
|
|
207
|
+
self.assertIsInstance(sample_res, dict)
|
|
208
|
+
pandas.testing.assert_frame_equal(sample_res["sample"],
|
|
209
|
+
self.sample_data_to_write)
|
|
210
|
+
|
|
211
|
+
new_sample_res = self.data_models.read(new_sample)
|
|
212
|
+
self.assertEqual(len(new_sample_res), 1)
|
|
213
|
+
self.assertIsInstance(new_sample_res, dict)
|
|
214
|
+
pandas.testing.assert_frame_equal(sample_res["sample"],
|
|
215
|
+
new_sample_res[new_sample])
|
|
216
|
+
|
|
217
|
+
self.data_models.delete(new_sample)
|
|
218
|
+
|
|
219
|
+
success_list.assert_has_calls([
|
|
220
|
+
mock.call({
|
|
221
|
+
'WorkspaceID': self.workspace_id,
|
|
222
|
+
}),
|
|
223
|
+
mock.call({
|
|
224
|
+
'WorkspaceID': self.workspace_id,
|
|
225
|
+
}),
|
|
226
|
+
mock.call({
|
|
227
|
+
'WorkspaceID': self.workspace_id,
|
|
228
|
+
})
|
|
229
|
+
])
|
|
230
|
+
success_rows_list.assert_has_calls([
|
|
231
|
+
mock.call({
|
|
232
|
+
'WorkspaceID': self.workspace_id,
|
|
233
|
+
'ID': "dccc0nkleig41ascop42g",
|
|
234
|
+
'PageSize': 0,
|
|
235
|
+
}),
|
|
236
|
+
mock.call({
|
|
237
|
+
'WorkspaceID': self.workspace_id,
|
|
238
|
+
'ID': new_sample_id,
|
|
239
|
+
'PageSize': 0,
|
|
240
|
+
}),
|
|
241
|
+
])
|
|
242
|
+
success_rows_id_list.assert_called_once_with({
|
|
243
|
+
'WorkspaceID': self.workspace_id,
|
|
244
|
+
'ID': new_sample_id,
|
|
245
|
+
})
|
|
246
|
+
success_delete.assert_called_once_with({
|
|
247
|
+
'WorkspaceID': self.workspace_id,
|
|
248
|
+
'ID': new_sample_id,
|
|
249
|
+
'RowIDs': ['your-sample-1-id', 'your-sample-2-id', 'your-sample-changed']
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
new_list_data_models_val["TotalCount"] = 9
|
|
253
|
+
new_list_data_models_val["Items"] = new_list_data_models_val["Items"][:-1]
|
|
254
|
+
with patch.object(BioOsService, "list_data_models",
|
|
255
|
+
return_value=new_list_data_models_val) as success_list:
|
|
256
|
+
try:
|
|
257
|
+
self.data_models.delete(new_sample)
|
|
258
|
+
except NotFoundError as e:
|
|
259
|
+
self.assertEqual(e.message, f"target '{new_sample}' not found")
|
bioos/tests/files.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import hashlib
|
|
3
|
+
import os
|
|
4
|
+
import random
|
|
5
|
+
import shutil
|
|
6
|
+
import time
|
|
7
|
+
import unittest
|
|
8
|
+
|
|
9
|
+
from pandas import DataFrame
|
|
10
|
+
|
|
11
|
+
from bioos.bioos import workspace
|
|
12
|
+
from bioos.tests.base import BaseInit
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestFiles(BaseInit):
|
|
16
|
+
tos_target_dir = f"test_upload_files-{time.time()}"
|
|
17
|
+
local_small_files_dir = "small_files/"
|
|
18
|
+
local_downloads_files_dir = "downloads/"
|
|
19
|
+
small_files = {}
|
|
20
|
+
big_file = {}
|
|
21
|
+
small_num = 5
|
|
22
|
+
big_num = 1
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def get_file_md5(cls, file_path):
|
|
26
|
+
with open(file_path, 'rb') as f:
|
|
27
|
+
data = f.read()
|
|
28
|
+
return hashlib.md5(data).hexdigest()
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def generate_random_str(cls, length=16):
|
|
32
|
+
"""
|
|
33
|
+
生成一个指定长度的随机字符串
|
|
34
|
+
"""
|
|
35
|
+
random_str = ''
|
|
36
|
+
base_str = 'abcdefghigklmnopqrstuvwxyz'
|
|
37
|
+
seed_len = len(base_str) - 1
|
|
38
|
+
for i in range(length):
|
|
39
|
+
random_str += base_str[random.randint(0, seed_len)]
|
|
40
|
+
return random_str
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def generate_random_file(cls, size, cur_dir="./"):
|
|
44
|
+
"""
|
|
45
|
+
生成临时文件。
|
|
46
|
+
"""
|
|
47
|
+
if not os.path.exists(cur_dir):
|
|
48
|
+
os.mkdir(cur_dir)
|
|
49
|
+
tmp_file = os.path.join(cur_dir, cls.generate_random_str(8))
|
|
50
|
+
with open(tmp_file, 'wb') as f:
|
|
51
|
+
while size > 0:
|
|
52
|
+
sz = int(min(size, 5 * 1024 * 1024))
|
|
53
|
+
data = os.urandom(sz)
|
|
54
|
+
f.write(data)
|
|
55
|
+
size -= sz
|
|
56
|
+
f.close()
|
|
57
|
+
return tmp_file
|
|
58
|
+
|
|
59
|
+
def setUp(self):
|
|
60
|
+
self.files = workspace(self.workspace_id).files
|
|
61
|
+
if not self.really_login:
|
|
62
|
+
return
|
|
63
|
+
# create 5 small files and 1 big file
|
|
64
|
+
for i in range(self.small_num):
|
|
65
|
+
small_file = os.path.normpath(
|
|
66
|
+
self.generate_random_file(5 * 1024 * 1024, self.local_small_files_dir))
|
|
67
|
+
self.small_files[small_file] = self.get_file_md5(small_file)
|
|
68
|
+
big_file = os.path.normpath(self.generate_random_file(200 * 1024 * 1024))
|
|
69
|
+
self.big_file[big_file] = self.get_file_md5(big_file)
|
|
70
|
+
|
|
71
|
+
def tearDown(self):
|
|
72
|
+
for f in self.big_file:
|
|
73
|
+
os.remove(f)
|
|
74
|
+
if os.path.exists(self.local_small_files_dir):
|
|
75
|
+
shutil.rmtree(self.local_small_files_dir)
|
|
76
|
+
if os.path.exists(self.local_downloads_files_dir):
|
|
77
|
+
shutil.rmtree(self.local_downloads_files_dir)
|
|
78
|
+
TestFiles.small_files = {}
|
|
79
|
+
TestFiles.big_file = {}
|
|
80
|
+
|
|
81
|
+
@unittest.skipUnless(BaseInit.really_login, "need real ak,sk,endpoint,"
|
|
82
|
+
"workspace_id")
|
|
83
|
+
def test_1_crud_case(self):
|
|
84
|
+
files = copy.deepcopy(self.small_files)
|
|
85
|
+
files.update(self.big_file)
|
|
86
|
+
self.files.upload(files, self.tos_target_dir, flatten=True)
|
|
87
|
+
|
|
88
|
+
list_df = self.files.list()
|
|
89
|
+
self.assertIsInstance(list_df, DataFrame)
|
|
90
|
+
uploaded_files_in_tos = \
|
|
91
|
+
list(list_df.query(f"key.str.startswith('{self.tos_target_dir}')")["key"])
|
|
92
|
+
self.assertEqual(len(uploaded_files_in_tos), self.small_num + self.big_num)
|
|
93
|
+
|
|
94
|
+
for f in uploaded_files_in_tos:
|
|
95
|
+
# local_dir will not be mapped to tos
|
|
96
|
+
self.assertNotIn(f"{self.tos_target_dir}/{self.local_small_files_dir}", f)
|
|
97
|
+
|
|
98
|
+
self.files.download([os.path.join(self.tos_target_dir, os.path.basename(f)) for f in files],
|
|
99
|
+
self.local_downloads_files_dir,
|
|
100
|
+
flatten=True)
|
|
101
|
+
md5_set = set()
|
|
102
|
+
for root, dirs, files in os.walk(self.local_downloads_files_dir):
|
|
103
|
+
self.assertEqual(len(files), self.big_num + self.small_num)
|
|
104
|
+
self.assertEqual(len(dirs), 0)
|
|
105
|
+
for name in files:
|
|
106
|
+
md5_set.add(self.get_file_md5(os.path.join(root, name)))
|
|
107
|
+
self.assertSetEqual(md5_set,
|
|
108
|
+
set(self.big_file.values()).union(set(self.small_files.values())))
|
|
109
|
+
|
|
110
|
+
# clean tos files
|
|
111
|
+
for f in uploaded_files_in_tos:
|
|
112
|
+
self.files.delete(f)
|
|
113
|
+
|
|
114
|
+
list_df = self.files.list()
|
|
115
|
+
files_in_tos_after_delete = list_df.query(f"key.str.startswith('{self.tos_target_dir}')")[
|
|
116
|
+
"key"]
|
|
117
|
+
self.assertEqual(len(files_in_tos_after_delete), 0)
|
|
118
|
+
|
|
119
|
+
# clean downloads files
|
|
120
|
+
shutil.rmtree(self.local_downloads_files_dir)
|
|
121
|
+
|
|
122
|
+
@unittest.skipUnless(BaseInit.really_login, "need real ak,sk,endpoint,"
|
|
123
|
+
"workspace_id")
|
|
124
|
+
def test_2_crud_case(self):
|
|
125
|
+
files = copy.deepcopy(self.small_files)
|
|
126
|
+
files.update(self.big_file)
|
|
127
|
+
self.files.upload(files, self.tos_target_dir, flatten=False)
|
|
128
|
+
|
|
129
|
+
list_df = self.files.list()
|
|
130
|
+
self.assertIsInstance(list_df, DataFrame)
|
|
131
|
+
uploaded_files_in_tos = \
|
|
132
|
+
list(list_df.query(f"key.str.startswith('{self.tos_target_dir}')")["key"])
|
|
133
|
+
self.assertEqual(len(uploaded_files_in_tos), self.big_num + self.small_num)
|
|
134
|
+
|
|
135
|
+
for f in uploaded_files_in_tos:
|
|
136
|
+
# make sure that all small files are in the directory
|
|
137
|
+
if os.path.basename(f) not in files:
|
|
138
|
+
self.assertIn(f"{self.tos_target_dir}/{self.local_small_files_dir}", f)
|
|
139
|
+
# make sure that the only one big file matches
|
|
140
|
+
else:
|
|
141
|
+
self.assertEqual(f"{self.tos_target_dir}/{tuple(self.big_file.keys())[0]}", f)
|
|
142
|
+
|
|
143
|
+
self.files.download([os.path.join(self.tos_target_dir, f) for f in files],
|
|
144
|
+
self.local_downloads_files_dir,
|
|
145
|
+
flatten=False)
|
|
146
|
+
md5_set = set()
|
|
147
|
+
for root, dirs, files in os.walk(self.local_downloads_files_dir):
|
|
148
|
+
# dir for big_file
|
|
149
|
+
if os.path.samefile(root,
|
|
150
|
+
os.path.join(self.local_downloads_files_dir, self.tos_target_dir)):
|
|
151
|
+
self.assertEqual(len(files), self.big_num)
|
|
152
|
+
# dir for small_files
|
|
153
|
+
elif os.path.samefile(root, os.path.join(self.local_downloads_files_dir,
|
|
154
|
+
os.path.join(self.tos_target_dir,
|
|
155
|
+
self.local_small_files_dir))):
|
|
156
|
+
self.assertEqual(len(files), self.small_num)
|
|
157
|
+
else:
|
|
158
|
+
self.assertEqual(len(files), 0)
|
|
159
|
+
for name in files:
|
|
160
|
+
md5_set.add(self.get_file_md5(os.path.join(root, name)))
|
|
161
|
+
self.assertSetEqual(md5_set,
|
|
162
|
+
set(self.big_file.values()).union(set(self.small_files.values())))
|
|
163
|
+
|
|
164
|
+
# clean tos files
|
|
165
|
+
for f in uploaded_files_in_tos:
|
|
166
|
+
self.files.delete(f)
|
|
167
|
+
|
|
168
|
+
list_df = self.files.list()
|
|
169
|
+
files_in_tos_after_delete = list_df.query(f"key.str.startswith('{self.tos_target_dir}')")[
|
|
170
|
+
"key"]
|
|
171
|
+
self.assertEqual(len(files_in_tos_after_delete), 0)
|
|
172
|
+
|
|
173
|
+
# clean downloads files
|
|
174
|
+
shutil.rmtree(self.local_downloads_files_dir)
|
bioos/tests/utils.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import time
|
|
3
|
+
import unittest
|
|
4
|
+
|
|
5
|
+
from bioos.utils.common_tools import SingletonType
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Foo(metaclass=SingletonType):
|
|
9
|
+
def __init__(self, name):
|
|
10
|
+
time.sleep(1)
|
|
11
|
+
self.name = name
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MyThread(threading.Thread):
|
|
15
|
+
def __init__(self, func, args, name=''):
|
|
16
|
+
threading.Thread.__init__(self)
|
|
17
|
+
self.func = func
|
|
18
|
+
self.name = name
|
|
19
|
+
self.args = args
|
|
20
|
+
self.res = None
|
|
21
|
+
|
|
22
|
+
def run(self):
|
|
23
|
+
self.res = self.func(*self.args)
|
|
24
|
+
|
|
25
|
+
def get_res(self):
|
|
26
|
+
return self.res
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TestUtils(unittest.TestCase):
|
|
30
|
+
@classmethod
|
|
31
|
+
def init_foo(cls, arg):
|
|
32
|
+
obj = Foo(str(arg))
|
|
33
|
+
return obj
|
|
34
|
+
|
|
35
|
+
def test_singletonType_diff_obj(self):
|
|
36
|
+
|
|
37
|
+
threads = []
|
|
38
|
+
for i in range(10):
|
|
39
|
+
threads.append(MyThread(func=self.init_foo, args=(i,), name=f'diff_obj {i}'))
|
|
40
|
+
|
|
41
|
+
for t in threads:
|
|
42
|
+
t.start()
|
|
43
|
+
|
|
44
|
+
for t in threads:
|
|
45
|
+
t.join()
|
|
46
|
+
|
|
47
|
+
res = set()
|
|
48
|
+
for t in threads:
|
|
49
|
+
res.add(t.get_res())
|
|
50
|
+
self.assertEqual(len(res), 10)
|
|
51
|
+
|
|
52
|
+
def test_singletonType_same_obj(self):
|
|
53
|
+
|
|
54
|
+
threads = []
|
|
55
|
+
for i in range(100):
|
|
56
|
+
threads.append(MyThread(func=self.init_foo, args=(0,), name=f'same_obj {i}'))
|
|
57
|
+
|
|
58
|
+
for t in threads:
|
|
59
|
+
t.start()
|
|
60
|
+
|
|
61
|
+
for t in threads:
|
|
62
|
+
t.join()
|
|
63
|
+
|
|
64
|
+
res = set()
|
|
65
|
+
for t in threads:
|
|
66
|
+
res.add(t.get_res())
|
|
67
|
+
|
|
68
|
+
self.assertEqual(len(res), 1)
|