bio-fastqc 0.8.3 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/bio/fastqc/semantics.rb +144 -116
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81a63db939c639f1747c86419e286711fa432678
|
4
|
+
data.tar.gz: 746a52bdc2acdf9c56a0c943cbbba942682cdaca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0fc5416cf49a6d2aebd70359685deb490b4fcbd1f0e786fd45e8fcca6bbf1535bf981ae9b47eaaa5198c5e3d252f28e0cb975c5c11f7436b99748758e815aaa5
|
7
|
+
data.tar.gz: f8d9957c91acc8a032e42396246937f1832e1ca0a507e98f49b4e4bd9e5c0673a20da3b9b767b5bc7d334f036309cb513ed19a167e50e1d904c6634ab832adf4
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.9.0
|
data/lib/bio/fastqc/semantics.rb
CHANGED
@@ -26,7 +26,7 @@ module Bio
|
|
26
26
|
|
27
27
|
def turtle_prefixes
|
28
28
|
{
|
29
|
-
"
|
29
|
+
"obo" => "http://purl.obolibrary.org/obo/",
|
30
30
|
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
31
31
|
"rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
|
32
32
|
"dcterms" => "http://purl.org/dc/terms/",
|
@@ -34,11 +34,13 @@ module Bio
|
|
34
34
|
"foaf" => "http://xmlns.com/foaf/0.1/",
|
35
35
|
"sos" => "http://purl.jp/bio/01/quanto/ontology/sos#",
|
36
36
|
"quanto" => "http://purl.jp/bio/01/quanto/resource/",
|
37
|
+
"sio" => "http://semanticscience.org/resource/",
|
38
|
+
"xsd" => "http://www.w3.org/2001/XMLSchema#",
|
37
39
|
}
|
38
40
|
end
|
39
41
|
|
40
42
|
def json_ld_object
|
41
|
-
object = [object_core, static_value_modules].flatten.inject(&:merge)
|
43
|
+
object = [object_core, static_value_modules, object_modules].flatten.inject(&:merge)
|
42
44
|
if !@tiny
|
43
45
|
object["hasMatrix"] = matrix_modules
|
44
46
|
end
|
@@ -54,7 +56,7 @@ module Bio
|
|
54
56
|
end
|
55
57
|
|
56
58
|
def identifier_literal
|
57
|
-
@id ? @id : "
|
59
|
+
@id ? @id : "QNT_" + @fastqc_object[:filename].split(".")[0]
|
58
60
|
end
|
59
61
|
|
60
62
|
def identifier_uri
|
@@ -91,21 +93,27 @@ module Bio
|
|
91
93
|
filename,
|
92
94
|
file_type,
|
93
95
|
encoding,
|
94
|
-
total_sequences,
|
95
|
-
filtered_sequences,
|
96
|
-
#sequence_length,
|
97
|
-
percent_gc,
|
98
|
-
total_duplicate_percentage,
|
99
|
-
min_length,
|
100
|
-
max_length,
|
101
|
-
overall_mean_quality_score,
|
102
|
-
overall_median_quality_score,
|
103
|
-
overall_n_content,
|
104
|
-
mean_sequence_length,
|
105
|
-
median_sequence_length,
|
106
96
|
]
|
107
97
|
end
|
108
98
|
|
99
|
+
def object_modules
|
100
|
+
{
|
101
|
+
"sio:SIO_000216" => [
|
102
|
+
total_sequences,
|
103
|
+
filtered_sequences,
|
104
|
+
percent_gc,
|
105
|
+
#total_duplicate_percentage,
|
106
|
+
min_length,
|
107
|
+
max_length,
|
108
|
+
overall_mean_quality_score,
|
109
|
+
overall_median_quality_score,
|
110
|
+
overall_n_content,
|
111
|
+
mean_sequence_length,
|
112
|
+
median_sequence_length,
|
113
|
+
]
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
109
117
|
def matrix_modules
|
110
118
|
[
|
111
119
|
per_base_sequence_quality,
|
@@ -157,40 +165,44 @@ module Bio
|
|
157
165
|
|
158
166
|
def total_sequences
|
159
167
|
{
|
160
|
-
"
|
161
|
-
|
162
|
-
|
163
|
-
"
|
164
|
-
|
168
|
+
"@type" => "totalSequences",
|
169
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
170
|
+
"sio:SIO_000300" => {
|
171
|
+
"@value" => @fastqc_object[:total_sequences],
|
172
|
+
"@type" => "xsd:integer",
|
173
|
+
},
|
165
174
|
}
|
166
175
|
end
|
167
176
|
|
168
177
|
def filtered_sequences
|
169
178
|
{
|
170
|
-
"
|
171
|
-
|
172
|
-
|
173
|
-
"
|
179
|
+
"@type" => "filteredSequences",
|
180
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
181
|
+
"sio:SIO_000300" => {
|
182
|
+
"@value" => @fastqc_object[:filtered_sequences],
|
183
|
+
"@type" => "xsd:integer",
|
174
184
|
}
|
175
185
|
}
|
176
186
|
end
|
177
187
|
|
178
188
|
def sequence_length
|
179
189
|
{
|
180
|
-
"
|
181
|
-
|
182
|
-
|
183
|
-
"
|
190
|
+
"@type" => "SequenceReadLength",
|
191
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
192
|
+
"sio:SIO_000300" => {
|
193
|
+
"@value" => @fastqc_object[:sequence_length],
|
194
|
+
"@type" => "xsd:string",
|
184
195
|
}
|
185
196
|
}
|
186
197
|
end
|
187
198
|
|
188
199
|
def percent_gc
|
189
200
|
{
|
190
|
-
"
|
191
|
-
|
192
|
-
|
193
|
-
"
|
201
|
+
"@type" => "percentGC",
|
202
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
203
|
+
"sio:SIO_000300" => {
|
204
|
+
"@value" => @fastqc_object[:percent_gc],
|
205
|
+
"@type" => "xsd:decimal",
|
194
206
|
}
|
195
207
|
}
|
196
208
|
end
|
@@ -221,33 +233,33 @@ module Bio
|
|
221
233
|
"basePosition" => base,
|
222
234
|
"meanBaseCallQuality" => {
|
223
235
|
"@type" => "PhredQualityScore",
|
224
|
-
"
|
225
|
-
"
|
236
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
237
|
+
"sio:SIO_000300" => mean,
|
226
238
|
},
|
227
239
|
"medianBaseCallQuality" => {
|
228
240
|
"@type" => "PhredQualityScore",
|
229
|
-
"
|
230
|
-
"
|
241
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
242
|
+
"sio:SIO_000300" => median,
|
231
243
|
},
|
232
244
|
"baseCallQualityLowerQuartile" => {
|
233
245
|
"@type" => "PhredQualityScore",
|
234
|
-
"
|
235
|
-
"
|
246
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
247
|
+
"sio:SIO_000300" => lower_quartile,
|
236
248
|
},
|
237
249
|
"baseCallQualityUpperQuartile" => {
|
238
250
|
"@type" => "PhredQualityScore",
|
239
|
-
"
|
240
|
-
"
|
251
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
252
|
+
"sio:SIO_000300" => upper_quartile,
|
241
253
|
},
|
242
254
|
"baseCallQuality10thPercentile" => {
|
243
255
|
"@type" => "PhredQualityScore",
|
244
|
-
"
|
245
|
-
"
|
256
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
257
|
+
"sio:SIO_000300" => tenth_percentile,
|
246
258
|
},
|
247
259
|
"baseCallQuality90thPercentile" => {
|
248
260
|
"@type" => "PhredQualityScore",
|
249
|
-
"
|
250
|
-
"
|
261
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
262
|
+
"sio:SIO_000300" => ninetieth_percentile,
|
251
263
|
},
|
252
264
|
}
|
253
265
|
end
|
@@ -273,13 +285,13 @@ module Bio
|
|
273
285
|
"rowIndex" => i,
|
274
286
|
"baseCallQuality" => {
|
275
287
|
"@type" => "PhredQualityScore",
|
276
|
-
"
|
277
|
-
"
|
288
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
289
|
+
"sio:SIO_000300" => quality,
|
278
290
|
},
|
279
291
|
"sequenceReadCount" => {
|
280
292
|
"@type" => "SequenceReadAmount",
|
281
|
-
"
|
282
|
-
"
|
293
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
294
|
+
"sio:SIO_000300" => count,
|
283
295
|
},
|
284
296
|
}
|
285
297
|
end
|
@@ -308,23 +320,23 @@ module Bio
|
|
308
320
|
"basePosition" => base,
|
309
321
|
"percentGuanine" => {
|
310
322
|
"@type" => "BaseRatio",
|
311
|
-
"
|
312
|
-
"
|
323
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
324
|
+
"sio:SIO_000300" => guanine,
|
313
325
|
},
|
314
326
|
"percentAdenine" => {
|
315
327
|
"@type" => "BaseRatio",
|
316
|
-
"
|
317
|
-
"
|
328
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
329
|
+
"sio:SIO_000300" => adenine,
|
318
330
|
},
|
319
331
|
"percentThymine" => {
|
320
332
|
"@type" => "BaseRatio",
|
321
|
-
"
|
322
|
-
"
|
333
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
334
|
+
"sio:SIO_000300" => thymine,
|
323
335
|
},
|
324
336
|
"percentCytosine" => {
|
325
337
|
"@type" => "BaseRatio",
|
326
|
-
"
|
327
|
-
"
|
338
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
339
|
+
"sio:SIO_000300" => chytosine,
|
328
340
|
},
|
329
341
|
}
|
330
342
|
end
|
@@ -346,13 +358,13 @@ module Bio
|
|
346
358
|
"rowIndex" => i,
|
347
359
|
"percentGC" => {
|
348
360
|
"@type" => "BaseRatio",
|
349
|
-
"
|
350
|
-
"
|
361
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
362
|
+
"sio:SIO_000300" => gc_content,
|
351
363
|
},
|
352
364
|
"sequenceReadCount" => {
|
353
365
|
"@type" => "SequenceReadAmount",
|
354
|
-
"
|
355
|
-
"
|
366
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
367
|
+
"sio:SIO_000300" => count,
|
356
368
|
},
|
357
369
|
}
|
358
370
|
end
|
@@ -378,8 +390,8 @@ module Bio
|
|
378
390
|
"basePosition" => base,
|
379
391
|
"nCount" => {
|
380
392
|
"@type" => "BaseRatio",
|
381
|
-
"
|
382
|
-
"
|
393
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
394
|
+
"sio:SIO_000300" => n_count,
|
383
395
|
},
|
384
396
|
}
|
385
397
|
end
|
@@ -402,13 +414,13 @@ module Bio
|
|
402
414
|
|
403
415
|
"sequenceReadLength" => {
|
404
416
|
"@type" => "SequenceReadLength",
|
405
|
-
"
|
406
|
-
"
|
417
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
418
|
+
"sio:SIO_000300" => length,
|
407
419
|
},
|
408
420
|
"sequenceReadCount" => {
|
409
421
|
"@type" => "SequenceReadAmount",
|
410
|
-
"
|
411
|
-
"
|
422
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
423
|
+
"sio:SIO_000300" => count,
|
412
424
|
},
|
413
425
|
}
|
414
426
|
end
|
@@ -435,13 +447,13 @@ module Bio
|
|
435
447
|
|
436
448
|
"sequenceDuplicationLevel" => {
|
437
449
|
"@type" => "SequenceDuplicationLevel",
|
438
|
-
"
|
439
|
-
"
|
450
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
451
|
+
"sio:SIO_000300" => duplication_level,
|
440
452
|
},
|
441
453
|
"sequenceReadRelativeCount" => {
|
442
454
|
"@type" => "SequenceReadAmount",
|
443
|
-
"
|
444
|
-
"
|
455
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
456
|
+
"sio:SIO_000300" => relative_count,
|
445
457
|
},
|
446
458
|
}
|
447
459
|
end
|
@@ -466,13 +478,13 @@ module Bio
|
|
466
478
|
"overrepresentedSequence" => sequence,
|
467
479
|
"sequenceReadCount" => {
|
468
480
|
"@type" => "SequenceReadAmount",
|
469
|
-
"
|
470
|
-
"
|
481
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
482
|
+
"sio:SIO_000300" => count,
|
471
483
|
},
|
472
484
|
"sequenceReadPercentage" => {
|
473
485
|
"@type" => "SequenceReadRatio",
|
474
|
-
"
|
475
|
-
"
|
486
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
487
|
+
"sio:SIO_000300" => percentage,
|
476
488
|
},
|
477
489
|
"possibleSourceOfSequence" => possible_source,
|
478
490
|
}
|
@@ -503,18 +515,18 @@ module Bio
|
|
503
515
|
"kmerSequence" => sequence,
|
504
516
|
"sequenceReadCount" => {
|
505
517
|
"@type" => "SequenceReadAmount",
|
506
|
-
"
|
507
|
-
"
|
518
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
519
|
+
"sio:SIO_000300" => count,
|
508
520
|
},
|
509
521
|
"observedPerExpectedOverall" => {
|
510
522
|
"@type" => "SequenceReadAmount",
|
511
|
-
"
|
512
|
-
"
|
523
|
+
"sio:SIO_000221" => "obo:Ratio",
|
524
|
+
"sio:SIO_000300" => ratio_overall,
|
513
525
|
},
|
514
526
|
"observedPerExpectedMax" => {
|
515
527
|
"@type" => "SequenceReadAmount",
|
516
|
-
"
|
517
|
-
"
|
528
|
+
"sio:SIO_000221" => "obo:Ratio",
|
529
|
+
"sio:SIO_000300" => ratio_max,
|
518
530
|
},
|
519
531
|
"observedPerExpectedMaxPosition" => ratio_max_position,
|
520
532
|
}
|
@@ -523,71 +535,78 @@ module Bio
|
|
523
535
|
|
524
536
|
def min_length
|
525
537
|
{
|
526
|
-
"
|
527
|
-
|
528
|
-
|
529
|
-
"
|
530
|
-
|
538
|
+
"@type" => "minimumSequenceLength",
|
539
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
540
|
+
"sio:SIO_000300" => {
|
541
|
+
"@value" => @fastqc_object[:min_length],
|
542
|
+
"@type" => "xsd:integer",
|
543
|
+
},
|
531
544
|
}
|
532
545
|
end
|
533
546
|
|
534
547
|
def max_length
|
535
548
|
{
|
536
|
-
"
|
537
|
-
|
538
|
-
|
539
|
-
"
|
540
|
-
|
549
|
+
"@type" => "maxSequenceLength",
|
550
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
551
|
+
"sio:SIO_000300" => {
|
552
|
+
"@value" => @fastqc_object[:max_length],
|
553
|
+
"@type" => "xsd:integer",
|
554
|
+
},
|
541
555
|
}
|
542
556
|
end
|
543
557
|
|
544
558
|
def mean_sequence_length
|
545
559
|
{
|
546
|
-
"
|
547
|
-
|
548
|
-
|
549
|
-
"
|
550
|
-
|
560
|
+
"@type" => "meanSequenceLength",
|
561
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
562
|
+
"sio:SIO_000300" => {
|
563
|
+
"@value" => @fastqc_object[:mean_sequence_length],
|
564
|
+
"@type" => "xsd:decimal",
|
565
|
+
},
|
551
566
|
}
|
552
567
|
end
|
553
568
|
|
554
569
|
def median_sequence_length
|
555
570
|
{
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
"
|
560
|
-
|
571
|
+
"@type" => "medianSequenceLength",
|
572
|
+
"sio:SIO_000221" => "obo:UO_0000244",
|
573
|
+
"sio:SIO_000300" => {
|
574
|
+
"@value" => @fastqc_object[:median_sequence_length],
|
575
|
+
"@type" => "xsd:decimal",
|
576
|
+
},
|
561
577
|
}
|
562
578
|
end
|
563
579
|
|
564
580
|
def overall_mean_quality_score
|
565
581
|
{
|
566
|
-
"
|
567
|
-
|
568
|
-
|
569
|
-
"
|
570
|
-
|
582
|
+
"@type" => "meanBaseCallQuality",
|
583
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
584
|
+
"sio:SIO_000300" => {
|
585
|
+
"@value" => @fastqc_object[:overall_mean_quality_score],
|
586
|
+
"@type" => "xsd:decimal",
|
587
|
+
},
|
571
588
|
}
|
572
589
|
end
|
573
590
|
|
574
591
|
def overall_median_quality_score
|
575
592
|
{
|
576
|
-
"
|
577
|
-
|
578
|
-
|
579
|
-
"
|
580
|
-
|
593
|
+
"@type" => "medianBaseCallQuality",
|
594
|
+
"sio:SIO_000221" => "obo:UO_0000189",
|
595
|
+
"sio:SIO_000300" => {
|
596
|
+
"@value" => @fastqc_object[:overall_median_quality_score],
|
597
|
+
"@type" => "xsd:decimal",
|
598
|
+
},
|
581
599
|
}
|
582
600
|
end
|
583
601
|
|
584
602
|
def overall_n_content
|
585
603
|
{
|
586
|
-
"
|
587
|
-
|
588
|
-
|
589
|
-
"
|
590
|
-
|
604
|
+
"@type" => "nContent",
|
605
|
+
"sio:SIO_000221" => "obo:UO_0000187",
|
606
|
+
"sio:SIO_000300" => {
|
607
|
+
"@value" => @fastqc_object[:overall_n_content],
|
608
|
+
"@type" => "xsd:decimal",
|
609
|
+
},
|
591
610
|
}
|
592
611
|
end
|
593
612
|
|
@@ -682,6 +701,16 @@ module Bio
|
|
682
701
|
"SequenceReadRatio",
|
683
702
|
"SequenceReadLength",
|
684
703
|
"SequenceDuplicationLevel",
|
704
|
+
"nContent",
|
705
|
+
"percentGC",
|
706
|
+
"medianBaseCallQuality",
|
707
|
+
"meanBaseCallQuality",
|
708
|
+
"totalSequences",
|
709
|
+
"filteredSequences",
|
710
|
+
"minimumSequenceLength",
|
711
|
+
"maxSequenceLength",
|
712
|
+
"meanSequenceLength",
|
713
|
+
"medianSequenceLength",
|
685
714
|
]
|
686
715
|
end
|
687
716
|
|
@@ -715,7 +744,6 @@ module Bio
|
|
715
744
|
"sequenceReadLength",
|
716
745
|
"sequenceReadPercentage",
|
717
746
|
"sequenceReadRelativeCount",
|
718
|
-
"hasUnit",
|
719
747
|
"overallMeanBaseCallQuality",
|
720
748
|
"overallMedianBaseCallQuality",
|
721
749
|
"overallNContent",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-fastqc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tazro Inutano Ohta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|