seqchromloader 0.6.0__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: seqchromloader
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: Sequence and chromatin dataloader for deep learning
5
5
  Home-page: https://github.com/yztxwd/seqchromloader
6
6
  Author-email: yztxwd@gmail.com
@@ -116,7 +116,8 @@ class _SeqChromDatasetByDataFrame(Dataset):
116
116
  bigwig_filelist:list,
117
117
  target_bam=None,
118
118
  transforms:dict=None,
119
- initialize_first=False):
119
+ initialize_first=False,
120
+ return_region=False):
120
121
 
121
122
  self.dataframe = dataframe
122
123
  self.genome_fasta = genome_fasta
@@ -129,6 +130,8 @@ class _SeqChromDatasetByDataFrame(Dataset):
129
130
  self.transforms = transforms
130
131
 
131
132
  if initialize_first: self.initialize()
133
+
134
+ self.return_region = return_region
132
135
 
133
136
  def initialize(self):
134
137
  # create the stream handler after child processes spawned to enable parallel reading
@@ -158,7 +161,10 @@ class _SeqChromDatasetByDataFrame(Dataset):
158
161
  except utils.BigWigInaccessible as e:
159
162
  raise e
160
163
 
161
- return feature['seq'], feature['chrom'], feature['target'], feature['label']
164
+ if not self.return_region:
165
+ return feature['seq'], feature['chrom'], feature['target'], feature['label']
166
+ else:
167
+ return f'{item.chrom}:{item.start}-{item.end}', feature['seq'], feature['chrom'], feature['target'], feature['label']
162
168
 
163
169
  SeqChromDatasetByDataFrame = seqChromLoaderCurry(_SeqChromDatasetByDataFrame)
164
170
 
@@ -175,14 +181,15 @@ class _SeqChromDatasetByBed(_SeqChromDatasetByDataFrame):
175
181
  :param transforms: A dictionary of functions to transform the output data, accepted keys are *["seq", "chrom", "target", "label"]*
176
182
  :type transforms: dict of functions
177
183
  """
178
- def __init__(self, bed: str, genome_fasta: str, bigwig_filelist:list, target_bam=None, transforms:dict=None, initialize_first=False):
184
+ def __init__(self, bed: str, genome_fasta: str, bigwig_filelist:list, target_bam=None, transforms:dict=None, initialize_first=False, return_region=False):
179
185
  dataframe = pd.read_table(bed, header=None, names=['chrom', 'start', 'end', 'label', 'score', 'strand' ])
180
186
  super().__init__(dataframe,
181
187
  genome_fasta,
182
188
  bigwig_filelist,
183
189
  target_bam,
184
190
  transforms,
185
- initialize_first)
191
+ initialize_first,
192
+ return_region)
186
193
 
187
194
  SeqChromDatasetByBed = seqChromLoaderCurry(_SeqChromDatasetByBed)
188
195
 
@@ -354,9 +354,14 @@ def extract_target(chrom, start, end, strand, target):
354
354
  if isinstance(target, pysam.AlignmentFile):
355
355
  target_array = np.array(target.count(chrom, start, end), dtype=np.float32)[np.newaxis]
356
356
  elif isinstance(target, pyBigWig.pyBigWig):
357
- target_array = np.nan_to_num(target.values(chrom, start, end)).astype(np.float32)
358
- if strand=="-":
359
- target_array = target_array[::-1]
357
+ try:
358
+ target_array = np.nan_to_num(target.values(chrom, start, end)).astype(np.float32)
359
+ if strand=="-":
360
+ target_array = target_array[::-1]
361
+ except RuntimeError as e:
362
+ logging.warning(e)
363
+ logging.warning(f"RuntimeError happened when accessing {chrom}:{start}-{end}, it's probably due to at least one chromatin track bigwig doesn't have information in this region")
364
+ raise BigWigInaccessible(chrom, start, end)
360
365
  else:
361
366
  target_array = None
362
367
  return target_array
@@ -385,4 +390,4 @@ def extract_info(chrom, start, end, label, genome_pyfaidx, bigwigs, target, stra
385
390
  for k,t in transforms.items():
386
391
  feature[k] = t(feature[k])
387
392
 
388
- return feature
393
+ return feature
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: seqchromloader
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: Sequence and chromatin dataloader for deep learning
5
5
  Home-page: https://github.com/yztxwd/seqchromloader
6
6
  Author-email: yztxwd@gmail.com
@@ -20,7 +20,7 @@ setup(
20
20
  # eg: 1.0.0, 1.0.1, 3.0.2, 5.0-beta, etc.
21
21
  # You CANNOT upload two versions of your package with the same version number
22
22
  # This field is REQUIRED
23
- version="0.6.0",
23
+ version="0.6.2",
24
24
 
25
25
  # The packages that constitute your project.
26
26
  # For my project, I have only one - "pydash".
@@ -245,6 +245,23 @@ class Test(unittest.TestCase):
245
245
  self.assertEqual(target[0].item(), 6.0)
246
246
  self.assertEqual(label[1].item(), 1)
247
247
 
248
+ def test_bed_loader_return_region(self):
249
+
250
+ it = iter(SeqChromDatasetByBed(
251
+ bed="data/sample.bed",
252
+ genome_fasta="data/sample.fa",
253
+ bigwig_filelist=["data/sample.bw"],
254
+ target_bam="data/sample.bam",
255
+ transforms={"seq": test_seq_transform,
256
+ "chrom": test_chrom_transform,
257
+ "target": test_target_transform},
258
+ dataloader_kws={"batch_size":2,
259
+ "shuffle":False},
260
+ return_region=True
261
+ ))
262
+ region, seq, chrom, target, label = next(it)
263
+ self.assertEqual(region[0], "chr19:0-5")
264
+
248
265
  def test_lightning_datamodule(self):
249
266
  dm = SeqChromDataModule(
250
267
  train_wds="data/test_0.tar.gz",
@@ -295,4 +312,4 @@ def test_target_transform(target):
295
312
  return target * 3
296
313
 
297
314
  if __name__ == "__main__":
298
- unittest.main(verbosity=2)
315
+ unittest.main(verbosity=2)
File without changes
File without changes