seqchromloader 0.6.0__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/PKG-INFO +1 -1
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader/loader.py +11 -4
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader/utils.py +9 -4
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader.egg-info/PKG-INFO +1 -1
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/setup.py +1 -1
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/tests/test_writer_loader.py +18 -1
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/README.md +0 -0
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader/__init__.py +0 -0
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader/writer.py +0 -0
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader.egg-info/SOURCES.txt +0 -0
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader.egg-info/dependency_links.txt +0 -0
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader.egg-info/requires.txt +0 -0
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/seqchromloader.egg-info/top_level.txt +0 -0
- {seqchromloader-0.6.0 → seqchromloader-0.6.2}/setup.cfg +0 -0
|
@@ -116,7 +116,8 @@ class _SeqChromDatasetByDataFrame(Dataset):
|
|
|
116
116
|
bigwig_filelist:list,
|
|
117
117
|
target_bam=None,
|
|
118
118
|
transforms:dict=None,
|
|
119
|
-
initialize_first=False
|
|
119
|
+
initialize_first=False,
|
|
120
|
+
return_region=False):
|
|
120
121
|
|
|
121
122
|
self.dataframe = dataframe
|
|
122
123
|
self.genome_fasta = genome_fasta
|
|
@@ -129,6 +130,8 @@ class _SeqChromDatasetByDataFrame(Dataset):
|
|
|
129
130
|
self.transforms = transforms
|
|
130
131
|
|
|
131
132
|
if initialize_first: self.initialize()
|
|
133
|
+
|
|
134
|
+
self.return_region = return_region
|
|
132
135
|
|
|
133
136
|
def initialize(self):
|
|
134
137
|
# create the stream handler after child processes spawned to enable parallel reading
|
|
@@ -158,7 +161,10 @@ class _SeqChromDatasetByDataFrame(Dataset):
|
|
|
158
161
|
except utils.BigWigInaccessible as e:
|
|
159
162
|
raise e
|
|
160
163
|
|
|
161
|
-
|
|
164
|
+
if not self.return_region:
|
|
165
|
+
return feature['seq'], feature['chrom'], feature['target'], feature['label']
|
|
166
|
+
else:
|
|
167
|
+
return f'{item.chrom}:{item.start}-{item.end}', feature['seq'], feature['chrom'], feature['target'], feature['label']
|
|
162
168
|
|
|
163
169
|
SeqChromDatasetByDataFrame = seqChromLoaderCurry(_SeqChromDatasetByDataFrame)
|
|
164
170
|
|
|
@@ -175,14 +181,15 @@ class _SeqChromDatasetByBed(_SeqChromDatasetByDataFrame):
|
|
|
175
181
|
:param transforms: A dictionary of functions to transform the output data, accepted keys are *["seq", "chrom", "target", "label"]*
|
|
176
182
|
:type transforms: dict of functions
|
|
177
183
|
"""
|
|
178
|
-
def __init__(self, bed: str, genome_fasta: str, bigwig_filelist:list, target_bam=None, transforms:dict=None, initialize_first=False):
|
|
184
|
+
def __init__(self, bed: str, genome_fasta: str, bigwig_filelist:list, target_bam=None, transforms:dict=None, initialize_first=False, return_region=False):
|
|
179
185
|
dataframe = pd.read_table(bed, header=None, names=['chrom', 'start', 'end', 'label', 'score', 'strand' ])
|
|
180
186
|
super().__init__(dataframe,
|
|
181
187
|
genome_fasta,
|
|
182
188
|
bigwig_filelist,
|
|
183
189
|
target_bam,
|
|
184
190
|
transforms,
|
|
185
|
-
initialize_first
|
|
191
|
+
initialize_first,
|
|
192
|
+
return_region)
|
|
186
193
|
|
|
187
194
|
SeqChromDatasetByBed = seqChromLoaderCurry(_SeqChromDatasetByBed)
|
|
188
195
|
|
|
@@ -354,9 +354,14 @@ def extract_target(chrom, start, end, strand, target):
|
|
|
354
354
|
if isinstance(target, pysam.AlignmentFile):
|
|
355
355
|
target_array = np.array(target.count(chrom, start, end), dtype=np.float32)[np.newaxis]
|
|
356
356
|
elif isinstance(target, pyBigWig.pyBigWig):
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
357
|
+
try:
|
|
358
|
+
target_array = np.nan_to_num(target.values(chrom, start, end)).astype(np.float32)
|
|
359
|
+
if strand=="-":
|
|
360
|
+
target_array = target_array[::-1]
|
|
361
|
+
except RuntimeError as e:
|
|
362
|
+
logging.warning(e)
|
|
363
|
+
logging.warning(f"RuntimeError happened when accessing {chrom}:{start}-{end}, it's probably due to at least one chromatin track bigwig doesn't have information in this region")
|
|
364
|
+
raise BigWigInaccessible(chrom, start, end)
|
|
360
365
|
else:
|
|
361
366
|
target_array = None
|
|
362
367
|
return target_array
|
|
@@ -385,4 +390,4 @@ def extract_info(chrom, start, end, label, genome_pyfaidx, bigwigs, target, stra
|
|
|
385
390
|
for k,t in transforms.items():
|
|
386
391
|
feature[k] = t(feature[k])
|
|
387
392
|
|
|
388
|
-
return feature
|
|
393
|
+
return feature
|
|
@@ -20,7 +20,7 @@ setup(
|
|
|
20
20
|
# eg: 1.0.0, 1.0.1, 3.0.2, 5.0-beta, etc.
|
|
21
21
|
# You CANNOT upload two versions of your package with the same version number
|
|
22
22
|
# This field is REQUIRED
|
|
23
|
-
version="0.6.
|
|
23
|
+
version="0.6.2",
|
|
24
24
|
|
|
25
25
|
# The packages that constitute your project.
|
|
26
26
|
# For my project, I have only one - "pydash".
|
|
@@ -245,6 +245,23 @@ class Test(unittest.TestCase):
|
|
|
245
245
|
self.assertEqual(target[0].item(), 6.0)
|
|
246
246
|
self.assertEqual(label[1].item(), 1)
|
|
247
247
|
|
|
248
|
+
def test_bed_loader_return_region(self):
|
|
249
|
+
|
|
250
|
+
it = iter(SeqChromDatasetByBed(
|
|
251
|
+
bed="data/sample.bed",
|
|
252
|
+
genome_fasta="data/sample.fa",
|
|
253
|
+
bigwig_filelist=["data/sample.bw"],
|
|
254
|
+
target_bam="data/sample.bam",
|
|
255
|
+
transforms={"seq": test_seq_transform,
|
|
256
|
+
"chrom": test_chrom_transform,
|
|
257
|
+
"target": test_target_transform},
|
|
258
|
+
dataloader_kws={"batch_size":2,
|
|
259
|
+
"shuffle":False},
|
|
260
|
+
return_region=True
|
|
261
|
+
))
|
|
262
|
+
region, seq, chrom, target, label = next(it)
|
|
263
|
+
self.assertEqual(region[0], "chr19:0-5")
|
|
264
|
+
|
|
248
265
|
def test_lightning_datamodule(self):
|
|
249
266
|
dm = SeqChromDataModule(
|
|
250
267
|
train_wds="data/test_0.tar.gz",
|
|
@@ -295,4 +312,4 @@ def test_target_transform(target):
|
|
|
295
312
|
return target * 3
|
|
296
313
|
|
|
297
314
|
if __name__ == "__main__":
|
|
298
|
-
unittest.main(verbosity=2)
|
|
315
|
+
unittest.main(verbosity=2)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|