bio-fastqc 0.8.3 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/bio/fastqc/semantics.rb +144 -116
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 22283e04070c96f6335a8e4c281977ca1cc5de5a
4
- data.tar.gz: a211818f6145f370398bb3704daffdbbb512052e
3
+ metadata.gz: 81a63db939c639f1747c86419e286711fa432678
4
+ data.tar.gz: 746a52bdc2acdf9c56a0c943cbbba942682cdaca
5
5
  SHA512:
6
- metadata.gz: fa2fa2843107a62e3bd644e10bd01105d72821609d74021c66fba0d09c417bc23447ffa47797339e24115fe7bcddc88b79e07a0e7f90222429450cdec44065c6
7
- data.tar.gz: 829e6e30c3065bb6fc168af5ce4285ec6a76b061449be016f9341b0136953279ae425a2c0eee6608a1c591d5ba04be146944f1ae01317302b7095982d6f99d85
6
+ metadata.gz: 0fc5416cf49a6d2aebd70359685deb490b4fcbd1f0e786fd45e8fcca6bbf1535bf981ae9b47eaaa5198c5e3d252f28e0cb975c5c11f7436b99748758e815aaa5
7
+ data.tar.gz: f8d9957c91acc8a032e42396246937f1832e1ca0a507e98f49b4e4bd9e5c0673a20da3b9b767b5bc7d334f036309cb513ed19a167e50e1d904c6634ab832adf4
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.3
1
+ 0.9.0
@@ -26,7 +26,7 @@ module Bio
26
26
 
27
27
  def turtle_prefixes
28
28
  {
29
- "uo" => "http://purl.obolibrary.org/obo/",
29
+ "obo" => "http://purl.obolibrary.org/obo/",
30
30
  "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
31
31
  "rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
32
32
  "dcterms" => "http://purl.org/dc/terms/",
@@ -34,11 +34,13 @@ module Bio
34
34
  "foaf" => "http://xmlns.com/foaf/0.1/",
35
35
  "sos" => "http://purl.jp/bio/01/quanto/ontology/sos#",
36
36
  "quanto" => "http://purl.jp/bio/01/quanto/resource/",
37
+ "sio" => "http://semanticscience.org/resource/",
38
+ "xsd" => "http://www.w3.org/2001/XMLSchema#",
37
39
  }
38
40
  end
39
41
 
40
42
  def json_ld_object
41
- object = [object_core, static_value_modules].flatten.inject(&:merge)
43
+ object = [object_core, static_value_modules, object_modules].flatten.inject(&:merge)
42
44
  if !@tiny
43
45
  object["hasMatrix"] = matrix_modules
44
46
  end
@@ -54,7 +56,7 @@ module Bio
54
56
  end
55
57
 
56
58
  def identifier_literal
57
- @id ? @id : "QNT" + @fastqc_object[:filename].split(".")[0]
59
+ @id ? @id : "QNT_" + @fastqc_object[:filename].split(".")[0]
58
60
  end
59
61
 
60
62
  def identifier_uri
@@ -91,21 +93,27 @@ module Bio
91
93
  filename,
92
94
  file_type,
93
95
  encoding,
94
- total_sequences,
95
- filtered_sequences,
96
- #sequence_length,
97
- percent_gc,
98
- total_duplicate_percentage,
99
- min_length,
100
- max_length,
101
- overall_mean_quality_score,
102
- overall_median_quality_score,
103
- overall_n_content,
104
- mean_sequence_length,
105
- median_sequence_length,
106
96
  ]
107
97
  end
108
98
 
99
+ def object_modules
100
+ {
101
+ "sio:SIO_000216" => [
102
+ total_sequences,
103
+ filtered_sequences,
104
+ percent_gc,
105
+ #total_duplicate_percentage,
106
+ min_length,
107
+ max_length,
108
+ overall_mean_quality_score,
109
+ overall_median_quality_score,
110
+ overall_n_content,
111
+ mean_sequence_length,
112
+ median_sequence_length,
113
+ ]
114
+ }
115
+ end
116
+
109
117
  def matrix_modules
110
118
  [
111
119
  per_base_sequence_quality,
@@ -157,40 +165,44 @@ module Bio
157
165
 
158
166
  def total_sequences
159
167
  {
160
- "totalSequences" => {
161
- "@type" => "SequenceReadAmount",
162
- "hasUnit" => "uo:UO_0000244",
163
- "rdf:value" => @fastqc_object[:total_sequences],
164
- }
168
+ "@type" => "totalSequences",
169
+ "sio:SIO_000221" => "obo:UO_0000244",
170
+ "sio:SIO_000300" => {
171
+ "@value" => @fastqc_object[:total_sequences],
172
+ "@type" => "xsd:integer",
173
+ },
165
174
  }
166
175
  end
167
176
 
168
177
  def filtered_sequences
169
178
  {
170
- "filteredSequences" => {
171
- "@type" => "SequenceReadAmount",
172
- "hasUnit" => "uo:UO_0000244",
173
- "rdf:value" => @fastqc_object[:filtered_sequences],
179
+ "@type" => "filteredSequences",
180
+ "sio:SIO_000221" => "obo:UO_0000244",
181
+ "sio:SIO_000300" => {
182
+ "@value" => @fastqc_object[:filtered_sequences],
183
+ "@type" => "xsd:integer",
174
184
  }
175
185
  }
176
186
  end
177
187
 
178
188
  def sequence_length
179
189
  {
180
- "sequenceLength" => {
181
- "@type" => "SequenceReadLength",
182
- "hasUnit" => "uo:UO_0000244",
183
- "rdf:value" => @fastqc_object[:sequence_length],
190
+ "@type" => "SequenceReadLength",
191
+ "sio:SIO_000221" => "obo:UO_0000244",
192
+ "sio:SIO_000300" => {
193
+ "@value" => @fastqc_object[:sequence_length],
194
+ "@type" => "xsd:string",
184
195
  }
185
196
  }
186
197
  end
187
198
 
188
199
  def percent_gc
189
200
  {
190
- "percentGC" => {
191
- "@type" => "BaseRatio",
192
- "hasUnit" => "uo:UO_0000187",
193
- "rdf:value" => @fastqc_object[:percent_gc],
201
+ "@type" => "percentGC",
202
+ "sio:SIO_000221" => "obo:UO_0000187",
203
+ "sio:SIO_000300" => {
204
+ "@value" => @fastqc_object[:percent_gc],
205
+ "@type" => "xsd:decimal",
194
206
  }
195
207
  }
196
208
  end
@@ -221,33 +233,33 @@ module Bio
221
233
  "basePosition" => base,
222
234
  "meanBaseCallQuality" => {
223
235
  "@type" => "PhredQualityScore",
224
- "hasUnit" => "uo:UO_0000189",
225
- "rdf:value" => mean,
236
+ "sio:SIO_000221" => "obo:UO_0000189",
237
+ "sio:SIO_000300" => mean,
226
238
  },
227
239
  "medianBaseCallQuality" => {
228
240
  "@type" => "PhredQualityScore",
229
- "hasUnit" => "uo:UO_0000189",
230
- "rdf:value" => median,
241
+ "sio:SIO_000221" => "obo:UO_0000189",
242
+ "sio:SIO_000300" => median,
231
243
  },
232
244
  "baseCallQualityLowerQuartile" => {
233
245
  "@type" => "PhredQualityScore",
234
- "hasUnit" => "uo:UO_0000189",
235
- "rdf:value" => lower_quartile,
246
+ "sio:SIO_000221" => "obo:UO_0000189",
247
+ "sio:SIO_000300" => lower_quartile,
236
248
  },
237
249
  "baseCallQualityUpperQuartile" => {
238
250
  "@type" => "PhredQualityScore",
239
- "hasUnit" => "uo:UO_0000189",
240
- "rdf:value" => upper_quartile,
251
+ "sio:SIO_000221" => "obo:UO_0000189",
252
+ "sio:SIO_000300" => upper_quartile,
241
253
  },
242
254
  "baseCallQuality10thPercentile" => {
243
255
  "@type" => "PhredQualityScore",
244
- "hasUnit" => "uo:UO_0000189",
245
- "rdf:value" => tenth_percentile,
256
+ "sio:SIO_000221" => "obo:UO_0000189",
257
+ "sio:SIO_000300" => tenth_percentile,
246
258
  },
247
259
  "baseCallQuality90thPercentile" => {
248
260
  "@type" => "PhredQualityScore",
249
- "hasUnit" => "uo:UO_0000189",
250
- "rdf:value" => ninetieth_percentile,
261
+ "sio:SIO_000221" => "obo:UO_0000189",
262
+ "sio:SIO_000300" => ninetieth_percentile,
251
263
  },
252
264
  }
253
265
  end
@@ -273,13 +285,13 @@ module Bio
273
285
  "rowIndex" => i,
274
286
  "baseCallQuality" => {
275
287
  "@type" => "PhredQualityScore",
276
- "hasUnit" => "uo:UO_0000189",
277
- "rdf:value" => quality,
288
+ "sio:SIO_000221" => "obo:UO_0000189",
289
+ "sio:SIO_000300" => quality,
278
290
  },
279
291
  "sequenceReadCount" => {
280
292
  "@type" => "SequenceReadAmount",
281
- "hasUnit" => "uo:UO_0000244",
282
- "rdf:value" => count,
293
+ "sio:SIO_000221" => "obo:UO_0000244",
294
+ "sio:SIO_000300" => count,
283
295
  },
284
296
  }
285
297
  end
@@ -308,23 +320,23 @@ module Bio
308
320
  "basePosition" => base,
309
321
  "percentGuanine" => {
310
322
  "@type" => "BaseRatio",
311
- "hasUnit" => "uo:UO_0000187",
312
- "rdf:value" => guanine,
323
+ "sio:SIO_000221" => "obo:UO_0000187",
324
+ "sio:SIO_000300" => guanine,
313
325
  },
314
326
  "percentAdenine" => {
315
327
  "@type" => "BaseRatio",
316
- "hasUnit" => "uo:UO_0000187",
317
- "rdf:value" => adenine,
328
+ "sio:SIO_000221" => "obo:UO_0000187",
329
+ "sio:SIO_000300" => adenine,
318
330
  },
319
331
  "percentThymine" => {
320
332
  "@type" => "BaseRatio",
321
- "hasUnit" => "uo:UO_0000187",
322
- "rdf:value" => thymine,
333
+ "sio:SIO_000221" => "obo:UO_0000187",
334
+ "sio:SIO_000300" => thymine,
323
335
  },
324
336
  "percentCytosine" => {
325
337
  "@type" => "BaseRatio",
326
- "hasUnit" => "uo:UO_0000187",
327
- "rdf:value" => chytosine,
338
+ "sio:SIO_000221" => "obo:UO_0000187",
339
+ "sio:SIO_000300" => chytosine,
328
340
  },
329
341
  }
330
342
  end
@@ -346,13 +358,13 @@ module Bio
346
358
  "rowIndex" => i,
347
359
  "percentGC" => {
348
360
  "@type" => "BaseRatio",
349
- "hasunit" => "uo:UO_0000187",
350
- "rdf:value" => gc_content,
361
+ "sio:SIO_000221" => "obo:UO_0000187",
362
+ "sio:SIO_000300" => gc_content,
351
363
  },
352
364
  "sequenceReadCount" => {
353
365
  "@type" => "SequenceReadAmount",
354
- "hasUnit" => "uo:UO_0000244",
355
- "rdf:value" => count,
366
+ "sio:SIO_000221" => "obo:UO_0000244",
367
+ "sio:SIO_000300" => count,
356
368
  },
357
369
  }
358
370
  end
@@ -378,8 +390,8 @@ module Bio
378
390
  "basePosition" => base,
379
391
  "nCount" => {
380
392
  "@type" => "BaseRatio",
381
- "hasUnit" => "uo:UO_0000187",
382
- "rdf:value" => n_count,
393
+ "sio:SIO_000221" => "obo:UO_0000187",
394
+ "sio:SIO_000300" => n_count,
383
395
  },
384
396
  }
385
397
  end
@@ -402,13 +414,13 @@ module Bio
402
414
 
403
415
  "sequenceReadLength" => {
404
416
  "@type" => "SequenceReadLength",
405
- "hasUnit" => "uo:UO_0000244",
406
- "rdf:value" => length,
417
+ "sio:SIO_000221" => "obo:UO_0000244",
418
+ "sio:SIO_000300" => length,
407
419
  },
408
420
  "sequenceReadCount" => {
409
421
  "@type" => "SequenceReadAmount",
410
- "hasUnit" => "uo:UO_0000244",
411
- "rdf:value" => count,
422
+ "sio:SIO_000221" => "obo:UO_0000244",
423
+ "sio:SIO_000300" => count,
412
424
  },
413
425
  }
414
426
  end
@@ -435,13 +447,13 @@ module Bio
435
447
 
436
448
  "sequenceDuplicationLevel" => {
437
449
  "@type" => "SequenceDuplicationLevel",
438
- "hasUnit" => "uo:UO_0000189",
439
- "rdf:value" => duplication_level,
450
+ "sio:SIO_000221" => "obo:UO_0000189",
451
+ "sio:SIO_000300" => duplication_level,
440
452
  },
441
453
  "sequenceReadRelativeCount" => {
442
454
  "@type" => "SequenceReadAmount",
443
- "hasUnit" => "uo:UO_0000244",
444
- "rdf:value" => relative_count,
455
+ "sio:SIO_000221" => "obo:UO_0000244",
456
+ "sio:SIO_000300" => relative_count,
445
457
  },
446
458
  }
447
459
  end
@@ -466,13 +478,13 @@ module Bio
466
478
  "overrepresentedSequence" => sequence,
467
479
  "sequenceReadCount" => {
468
480
  "@type" => "SequenceReadAmount",
469
- "hasUnit" => "uo:UO_0000244",
470
- "rdf:value" => count,
481
+ "sio:SIO_000221" => "obo:UO_0000244",
482
+ "sio:SIO_000300" => count,
471
483
  },
472
484
  "sequenceReadPercentage" => {
473
485
  "@type" => "SequenceReadRatio",
474
- "hasUnit" => "uo:UO_0000187",
475
- "rdf:value" => percentage,
486
+ "sio:SIO_000221" => "obo:UO_0000187",
487
+ "sio:SIO_000300" => percentage,
476
488
  },
477
489
  "possibleSourceOfSequence" => possible_source,
478
490
  }
@@ -503,18 +515,18 @@ module Bio
503
515
  "kmerSequence" => sequence,
504
516
  "sequenceReadCount" => {
505
517
  "@type" => "SequenceReadAmount",
506
- "hasUnit" => "uo:UO_0000244",
507
- "rdf:value" => count,
518
+ "sio:SIO_000221" => "obo:UO_0000244",
519
+ "sio:SIO_000300" => count,
508
520
  },
509
521
  "observedPerExpectedOverall" => {
510
522
  "@type" => "SequenceReadAmount",
511
- "hasUnit" => "uo:Ratio",
512
- "rdf:value" => ratio_overall,
523
+ "sio:SIO_000221" => "obo:Ratio",
524
+ "sio:SIO_000300" => ratio_overall,
513
525
  },
514
526
  "observedPerExpectedMax" => {
515
527
  "@type" => "SequenceReadAmount",
516
- "hasUnit" => "uo:Ratio",
517
- "rdf:value" => ratio_max,
528
+ "sio:SIO_000221" => "obo:Ratio",
529
+ "sio:SIO_000300" => ratio_max,
518
530
  },
519
531
  "observedPerExpectedMaxPosition" => ratio_max_position,
520
532
  }
@@ -523,71 +535,78 @@ module Bio
523
535
 
524
536
  def min_length
525
537
  {
526
- "minSequenceLength" => {
527
- "@type" => "SequenceReadLength",
528
- "hasUnit" => "uo:UO_0000244",
529
- "rdf:value" => @fastqc_object[:min_length],
530
- }
538
+ "@type" => "minimumSequenceLength",
539
+ "sio:SIO_000221" => "obo:UO_0000244",
540
+ "sio:SIO_000300" => {
541
+ "@value" => @fastqc_object[:min_length],
542
+ "@type" => "xsd:integer",
543
+ },
531
544
  }
532
545
  end
533
546
 
534
547
  def max_length
535
548
  {
536
- "maxSequenceLength" => {
537
- "@type" => "SequenceReadLength",
538
- "hasUnit" => "uo:UO_0000244",
539
- "rdf:value" => @fastqc_object[:max_length],
540
- }
549
+ "@type" => "maxSequenceLength",
550
+ "sio:SIO_000221" => "obo:UO_0000244",
551
+ "sio:SIO_000300" => {
552
+ "@value" => @fastqc_object[:max_length],
553
+ "@type" => "xsd:integer",
554
+ },
541
555
  }
542
556
  end
543
557
 
544
558
  def mean_sequence_length
545
559
  {
546
- "meanSequenceLength" => {
547
- "@type" => "SequenceReadLength",
548
- "hasUnit" => "uo:UO_0000244",
549
- "rdf:value" => @fastqc_object[:mean_sequence_length],
550
- }
560
+ "@type" => "meanSequenceLength",
561
+ "sio:SIO_000221" => "obo:UO_0000244",
562
+ "sio:SIO_000300" => {
563
+ "@value" => @fastqc_object[:mean_sequence_length],
564
+ "@type" => "xsd:decimal",
565
+ },
551
566
  }
552
567
  end
553
568
 
554
569
  def median_sequence_length
555
570
  {
556
- "medianSequenceLength" => {
557
- "@type" => "SequenceReadLength",
558
- "hasUnit" => "uo:UO_0000244",
559
- "rdf:value" => @fastqc_object[:median_sequence_length],
560
- }
571
+ "@type" => "medianSequenceLength",
572
+ "sio:SIO_000221" => "obo:UO_0000244",
573
+ "sio:SIO_000300" => {
574
+ "@value" => @fastqc_object[:median_sequence_length],
575
+ "@type" => "xsd:decimal",
576
+ },
561
577
  }
562
578
  end
563
579
 
564
580
  def overall_mean_quality_score
565
581
  {
566
- "overallMeanBaseCallQuality" => {
567
- "@type" => "PhredQualityScore",
568
- "hasUnit" => "uo:UO_0000189",
569
- "rdf:value" => @fastqc_object[:overall_mean_quality_score],
570
- }
582
+ "@type" => "meanBaseCallQuality",
583
+ "sio:SIO_000221" => "obo:UO_0000189",
584
+ "sio:SIO_000300" => {
585
+ "@value" => @fastqc_object[:overall_mean_quality_score],
586
+ "@type" => "xsd:decimal",
587
+ },
571
588
  }
572
589
  end
573
590
 
574
591
  def overall_median_quality_score
575
592
  {
576
- "overallMedianBaseCallQuality" => {
577
- "@type" => "PhredQualityScore",
578
- "hasUnit" => "uo:UO_0000189",
579
- "rdf:value" => @fastqc_object[:overall_median_quality_score],
580
- }
593
+ "@type" => "medianBaseCallQuality",
594
+ "sio:SIO_000221" => "obo:UO_0000189",
595
+ "sio:SIO_000300" => {
596
+ "@value" => @fastqc_object[:overall_median_quality_score],
597
+ "@type" => "xsd:decimal",
598
+ },
581
599
  }
582
600
  end
583
601
 
584
602
  def overall_n_content
585
603
  {
586
- "overallNContent" => {
587
- "@type" => "BaseRatio",
588
- "hasUnit" => "uo:UO_0000187",
589
- "rdf:value" => @fastqc_object[:overall_n_content],
590
- }
604
+ "@type" => "nContent",
605
+ "sio:SIO_000221" => "obo:UO_0000187",
606
+ "sio:SIO_000300" => {
607
+ "@value" => @fastqc_object[:overall_n_content],
608
+ "@type" => "xsd:decimal",
609
+ },
591
610
  }
592
611
  end
593
612
 
@@ -682,6 +701,16 @@ module Bio
682
701
  "SequenceReadRatio",
683
702
  "SequenceReadLength",
684
703
  "SequenceDuplicationLevel",
704
+ "nContent",
705
+ "percentGC",
706
+ "medianBaseCallQuality",
707
+ "meanBaseCallQuality",
708
+ "totalSequences",
709
+ "filteredSequences",
710
+ "minimumSequenceLength",
711
+ "maxSequenceLength",
712
+ "meanSequenceLength",
713
+ "medianSequenceLength",
685
714
  ]
686
715
  end
687
716
 
@@ -715,7 +744,6 @@ module Bio
715
744
  "sequenceReadLength",
716
745
  "sequenceReadPercentage",
717
746
  "sequenceReadRelativeCount",
718
- "hasUnit",
719
747
  "overallMeanBaseCallQuality",
720
748
  "overallMedianBaseCallQuality",
721
749
  "overallNContent",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-fastqc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tazro Inutano Ohta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-29 00:00:00.000000000 Z
11
+ date: 2016-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip