bio-fastqc 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/VERSION +1 -1
  3. data/lib/bio/fastqc/semantics.rb +144 -116
  4. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 22283e04070c96f6335a8e4c281977ca1cc5de5a
4
- data.tar.gz: a211818f6145f370398bb3704daffdbbb512052e
3
+ metadata.gz: 81a63db939c639f1747c86419e286711fa432678
4
+ data.tar.gz: 746a52bdc2acdf9c56a0c943cbbba942682cdaca
5
5
  SHA512:
6
- metadata.gz: fa2fa2843107a62e3bd644e10bd01105d72821609d74021c66fba0d09c417bc23447ffa47797339e24115fe7bcddc88b79e07a0e7f90222429450cdec44065c6
7
- data.tar.gz: 829e6e30c3065bb6fc168af5ce4285ec6a76b061449be016f9341b0136953279ae425a2c0eee6608a1c591d5ba04be146944f1ae01317302b7095982d6f99d85
6
+ metadata.gz: 0fc5416cf49a6d2aebd70359685deb490b4fcbd1f0e786fd45e8fcca6bbf1535bf981ae9b47eaaa5198c5e3d252f28e0cb975c5c11f7436b99748758e815aaa5
7
+ data.tar.gz: f8d9957c91acc8a032e42396246937f1832e1ca0a507e98f49b4e4bd9e5c0673a20da3b9b767b5bc7d334f036309cb513ed19a167e50e1d904c6634ab832adf4
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.3
1
+ 0.9.0
@@ -26,7 +26,7 @@ module Bio
26
26
 
27
27
  def turtle_prefixes
28
28
  {
29
- "uo" => "http://purl.obolibrary.org/obo/",
29
+ "obo" => "http://purl.obolibrary.org/obo/",
30
30
  "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
31
31
  "rdfs" => "http://www.w3.org/2000/01/rdf-schema#",
32
32
  "dcterms" => "http://purl.org/dc/terms/",
@@ -34,11 +34,13 @@ module Bio
34
34
  "foaf" => "http://xmlns.com/foaf/0.1/",
35
35
  "sos" => "http://purl.jp/bio/01/quanto/ontology/sos#",
36
36
  "quanto" => "http://purl.jp/bio/01/quanto/resource/",
37
+ "sio" => "http://semanticscience.org/resource/",
38
+ "xsd" => "http://www.w3.org/2001/XMLSchema#",
37
39
  }
38
40
  end
39
41
 
40
42
  def json_ld_object
41
- object = [object_core, static_value_modules].flatten.inject(&:merge)
43
+ object = [object_core, static_value_modules, object_modules].flatten.inject(&:merge)
42
44
  if !@tiny
43
45
  object["hasMatrix"] = matrix_modules
44
46
  end
@@ -54,7 +56,7 @@ module Bio
54
56
  end
55
57
 
56
58
  def identifier_literal
57
- @id ? @id : "QNT" + @fastqc_object[:filename].split(".")[0]
59
+ @id ? @id : "QNT_" + @fastqc_object[:filename].split(".")[0]
58
60
  end
59
61
 
60
62
  def identifier_uri
@@ -91,21 +93,27 @@ module Bio
91
93
  filename,
92
94
  file_type,
93
95
  encoding,
94
- total_sequences,
95
- filtered_sequences,
96
- #sequence_length,
97
- percent_gc,
98
- total_duplicate_percentage,
99
- min_length,
100
- max_length,
101
- overall_mean_quality_score,
102
- overall_median_quality_score,
103
- overall_n_content,
104
- mean_sequence_length,
105
- median_sequence_length,
106
96
  ]
107
97
  end
108
98
 
99
+ def object_modules
100
+ {
101
+ "sio:SIO_000216" => [
102
+ total_sequences,
103
+ filtered_sequences,
104
+ percent_gc,
105
+ #total_duplicate_percentage,
106
+ min_length,
107
+ max_length,
108
+ overall_mean_quality_score,
109
+ overall_median_quality_score,
110
+ overall_n_content,
111
+ mean_sequence_length,
112
+ median_sequence_length,
113
+ ]
114
+ }
115
+ end
116
+
109
117
  def matrix_modules
110
118
  [
111
119
  per_base_sequence_quality,
@@ -157,40 +165,44 @@ module Bio
157
165
 
158
166
  def total_sequences
159
167
  {
160
- "totalSequences" => {
161
- "@type" => "SequenceReadAmount",
162
- "hasUnit" => "uo:UO_0000244",
163
- "rdf:value" => @fastqc_object[:total_sequences],
164
- }
168
+ "@type" => "totalSequences",
169
+ "sio:SIO_000221" => "obo:UO_0000244",
170
+ "sio:SIO_000300" => {
171
+ "@value" => @fastqc_object[:total_sequences],
172
+ "@type" => "xsd:integer",
173
+ },
165
174
  }
166
175
  end
167
176
 
168
177
  def filtered_sequences
169
178
  {
170
- "filteredSequences" => {
171
- "@type" => "SequenceReadAmount",
172
- "hasUnit" => "uo:UO_0000244",
173
- "rdf:value" => @fastqc_object[:filtered_sequences],
179
+ "@type" => "filteredSequences",
180
+ "sio:SIO_000221" => "obo:UO_0000244",
181
+ "sio:SIO_000300" => {
182
+ "@value" => @fastqc_object[:filtered_sequences],
183
+ "@type" => "xsd:integer",
174
184
  }
175
185
  }
176
186
  end
177
187
 
178
188
  def sequence_length
179
189
  {
180
- "sequenceLength" => {
181
- "@type" => "SequenceReadLength",
182
- "hasUnit" => "uo:UO_0000244",
183
- "rdf:value" => @fastqc_object[:sequence_length],
190
+ "@type" => "SequenceReadLength",
191
+ "sio:SIO_000221" => "obo:UO_0000244",
192
+ "sio:SIO_000300" => {
193
+ "@value" => @fastqc_object[:sequence_length],
194
+ "@type" => "xsd:string",
184
195
  }
185
196
  }
186
197
  end
187
198
 
188
199
  def percent_gc
189
200
  {
190
- "percentGC" => {
191
- "@type" => "BaseRatio",
192
- "hasUnit" => "uo:UO_0000187",
193
- "rdf:value" => @fastqc_object[:percent_gc],
201
+ "@type" => "percentGC",
202
+ "sio:SIO_000221" => "obo:UO_0000187",
203
+ "sio:SIO_000300" => {
204
+ "@value" => @fastqc_object[:percent_gc],
205
+ "@type" => "xsd:decimal",
194
206
  }
195
207
  }
196
208
  end
@@ -221,33 +233,33 @@ module Bio
221
233
  "basePosition" => base,
222
234
  "meanBaseCallQuality" => {
223
235
  "@type" => "PhredQualityScore",
224
- "hasUnit" => "uo:UO_0000189",
225
- "rdf:value" => mean,
236
+ "sio:SIO_000221" => "obo:UO_0000189",
237
+ "sio:SIO_000300" => mean,
226
238
  },
227
239
  "medianBaseCallQuality" => {
228
240
  "@type" => "PhredQualityScore",
229
- "hasUnit" => "uo:UO_0000189",
230
- "rdf:value" => median,
241
+ "sio:SIO_000221" => "obo:UO_0000189",
242
+ "sio:SIO_000300" => median,
231
243
  },
232
244
  "baseCallQualityLowerQuartile" => {
233
245
  "@type" => "PhredQualityScore",
234
- "hasUnit" => "uo:UO_0000189",
235
- "rdf:value" => lower_quartile,
246
+ "sio:SIO_000221" => "obo:UO_0000189",
247
+ "sio:SIO_000300" => lower_quartile,
236
248
  },
237
249
  "baseCallQualityUpperQuartile" => {
238
250
  "@type" => "PhredQualityScore",
239
- "hasUnit" => "uo:UO_0000189",
240
- "rdf:value" => upper_quartile,
251
+ "sio:SIO_000221" => "obo:UO_0000189",
252
+ "sio:SIO_000300" => upper_quartile,
241
253
  },
242
254
  "baseCallQuality10thPercentile" => {
243
255
  "@type" => "PhredQualityScore",
244
- "hasUnit" => "uo:UO_0000189",
245
- "rdf:value" => tenth_percentile,
256
+ "sio:SIO_000221" => "obo:UO_0000189",
257
+ "sio:SIO_000300" => tenth_percentile,
246
258
  },
247
259
  "baseCallQuality90thPercentile" => {
248
260
  "@type" => "PhredQualityScore",
249
- "hasUnit" => "uo:UO_0000189",
250
- "rdf:value" => ninetieth_percentile,
261
+ "sio:SIO_000221" => "obo:UO_0000189",
262
+ "sio:SIO_000300" => ninetieth_percentile,
251
263
  },
252
264
  }
253
265
  end
@@ -273,13 +285,13 @@ module Bio
273
285
  "rowIndex" => i,
274
286
  "baseCallQuality" => {
275
287
  "@type" => "PhredQualityScore",
276
- "hasUnit" => "uo:UO_0000189",
277
- "rdf:value" => quality,
288
+ "sio:SIO_000221" => "obo:UO_0000189",
289
+ "sio:SIO_000300" => quality,
278
290
  },
279
291
  "sequenceReadCount" => {
280
292
  "@type" => "SequenceReadAmount",
281
- "hasUnit" => "uo:UO_0000244",
282
- "rdf:value" => count,
293
+ "sio:SIO_000221" => "obo:UO_0000244",
294
+ "sio:SIO_000300" => count,
283
295
  },
284
296
  }
285
297
  end
@@ -308,23 +320,23 @@ module Bio
308
320
  "basePosition" => base,
309
321
  "percentGuanine" => {
310
322
  "@type" => "BaseRatio",
311
- "hasUnit" => "uo:UO_0000187",
312
- "rdf:value" => guanine,
323
+ "sio:SIO_000221" => "obo:UO_0000187",
324
+ "sio:SIO_000300" => guanine,
313
325
  },
314
326
  "percentAdenine" => {
315
327
  "@type" => "BaseRatio",
316
- "hasUnit" => "uo:UO_0000187",
317
- "rdf:value" => adenine,
328
+ "sio:SIO_000221" => "obo:UO_0000187",
329
+ "sio:SIO_000300" => adenine,
318
330
  },
319
331
  "percentThymine" => {
320
332
  "@type" => "BaseRatio",
321
- "hasUnit" => "uo:UO_0000187",
322
- "rdf:value" => thymine,
333
+ "sio:SIO_000221" => "obo:UO_0000187",
334
+ "sio:SIO_000300" => thymine,
323
335
  },
324
336
  "percentCytosine" => {
325
337
  "@type" => "BaseRatio",
326
- "hasUnit" => "uo:UO_0000187",
327
- "rdf:value" => chytosine,
338
+ "sio:SIO_000221" => "obo:UO_0000187",
339
+ "sio:SIO_000300" => chytosine,
328
340
  },
329
341
  }
330
342
  end
@@ -346,13 +358,13 @@ module Bio
346
358
  "rowIndex" => i,
347
359
  "percentGC" => {
348
360
  "@type" => "BaseRatio",
349
- "hasunit" => "uo:UO_0000187",
350
- "rdf:value" => gc_content,
361
+ "sio:SIO_000221" => "obo:UO_0000187",
362
+ "sio:SIO_000300" => gc_content,
351
363
  },
352
364
  "sequenceReadCount" => {
353
365
  "@type" => "SequenceReadAmount",
354
- "hasUnit" => "uo:UO_0000244",
355
- "rdf:value" => count,
366
+ "sio:SIO_000221" => "obo:UO_0000244",
367
+ "sio:SIO_000300" => count,
356
368
  },
357
369
  }
358
370
  end
@@ -378,8 +390,8 @@ module Bio
378
390
  "basePosition" => base,
379
391
  "nCount" => {
380
392
  "@type" => "BaseRatio",
381
- "hasUnit" => "uo:UO_0000187",
382
- "rdf:value" => n_count,
393
+ "sio:SIO_000221" => "obo:UO_0000187",
394
+ "sio:SIO_000300" => n_count,
383
395
  },
384
396
  }
385
397
  end
@@ -402,13 +414,13 @@ module Bio
402
414
 
403
415
  "sequenceReadLength" => {
404
416
  "@type" => "SequenceReadLength",
405
- "hasUnit" => "uo:UO_0000244",
406
- "rdf:value" => length,
417
+ "sio:SIO_000221" => "obo:UO_0000244",
418
+ "sio:SIO_000300" => length,
407
419
  },
408
420
  "sequenceReadCount" => {
409
421
  "@type" => "SequenceReadAmount",
410
- "hasUnit" => "uo:UO_0000244",
411
- "rdf:value" => count,
422
+ "sio:SIO_000221" => "obo:UO_0000244",
423
+ "sio:SIO_000300" => count,
412
424
  },
413
425
  }
414
426
  end
@@ -435,13 +447,13 @@ module Bio
435
447
 
436
448
  "sequenceDuplicationLevel" => {
437
449
  "@type" => "SequenceDuplicationLevel",
438
- "hasUnit" => "uo:UO_0000189",
439
- "rdf:value" => duplication_level,
450
+ "sio:SIO_000221" => "obo:UO_0000189",
451
+ "sio:SIO_000300" => duplication_level,
440
452
  },
441
453
  "sequenceReadRelativeCount" => {
442
454
  "@type" => "SequenceReadAmount",
443
- "hasUnit" => "uo:UO_0000244",
444
- "rdf:value" => relative_count,
455
+ "sio:SIO_000221" => "obo:UO_0000244",
456
+ "sio:SIO_000300" => relative_count,
445
457
  },
446
458
  }
447
459
  end
@@ -466,13 +478,13 @@ module Bio
466
478
  "overrepresentedSequence" => sequence,
467
479
  "sequenceReadCount" => {
468
480
  "@type" => "SequenceReadAmount",
469
- "hasUnit" => "uo:UO_0000244",
470
- "rdf:value" => count,
481
+ "sio:SIO_000221" => "obo:UO_0000244",
482
+ "sio:SIO_000300" => count,
471
483
  },
472
484
  "sequenceReadPercentage" => {
473
485
  "@type" => "SequenceReadRatio",
474
- "hasUnit" => "uo:UO_0000187",
475
- "rdf:value" => percentage,
486
+ "sio:SIO_000221" => "obo:UO_0000187",
487
+ "sio:SIO_000300" => percentage,
476
488
  },
477
489
  "possibleSourceOfSequence" => possible_source,
478
490
  }
@@ -503,18 +515,18 @@ module Bio
503
515
  "kmerSequence" => sequence,
504
516
  "sequenceReadCount" => {
505
517
  "@type" => "SequenceReadAmount",
506
- "hasUnit" => "uo:UO_0000244",
507
- "rdf:value" => count,
518
+ "sio:SIO_000221" => "obo:UO_0000244",
519
+ "sio:SIO_000300" => count,
508
520
  },
509
521
  "observedPerExpectedOverall" => {
510
522
  "@type" => "SequenceReadAmount",
511
- "hasUnit" => "uo:Ratio",
512
- "rdf:value" => ratio_overall,
523
+ "sio:SIO_000221" => "obo:Ratio",
524
+ "sio:SIO_000300" => ratio_overall,
513
525
  },
514
526
  "observedPerExpectedMax" => {
515
527
  "@type" => "SequenceReadAmount",
516
- "hasUnit" => "uo:Ratio",
517
- "rdf:value" => ratio_max,
528
+ "sio:SIO_000221" => "obo:Ratio",
529
+ "sio:SIO_000300" => ratio_max,
518
530
  },
519
531
  "observedPerExpectedMaxPosition" => ratio_max_position,
520
532
  }
@@ -523,71 +535,78 @@ module Bio
523
535
 
524
536
  def min_length
525
537
  {
526
- "minSequenceLength" => {
527
- "@type" => "SequenceReadLength",
528
- "hasUnit" => "uo:UO_0000244",
529
- "rdf:value" => @fastqc_object[:min_length],
530
- }
538
+ "@type" => "minimumSequenceLength",
539
+ "sio:SIO_000221" => "obo:UO_0000244",
540
+ "sio:SIO_000300" => {
541
+ "@value" => @fastqc_object[:min_length],
542
+ "@type" => "xsd:integer",
543
+ },
531
544
  }
532
545
  end
533
546
 
534
547
  def max_length
535
548
  {
536
- "maxSequenceLength" => {
537
- "@type" => "SequenceReadLength",
538
- "hasUnit" => "uo:UO_0000244",
539
- "rdf:value" => @fastqc_object[:max_length],
540
- }
549
+ "@type" => "maxSequenceLength",
550
+ "sio:SIO_000221" => "obo:UO_0000244",
551
+ "sio:SIO_000300" => {
552
+ "@value" => @fastqc_object[:max_length],
553
+ "@type" => "xsd:integer",
554
+ },
541
555
  }
542
556
  end
543
557
 
544
558
  def mean_sequence_length
545
559
  {
546
- "meanSequenceLength" => {
547
- "@type" => "SequenceReadLength",
548
- "hasUnit" => "uo:UO_0000244",
549
- "rdf:value" => @fastqc_object[:mean_sequence_length],
550
- }
560
+ "@type" => "meanSequenceLength",
561
+ "sio:SIO_000221" => "obo:UO_0000244",
562
+ "sio:SIO_000300" => {
563
+ "@value" => @fastqc_object[:mean_sequence_length],
564
+ "@type" => "xsd:decimal",
565
+ },
551
566
  }
552
567
  end
553
568
 
554
569
  def median_sequence_length
555
570
  {
556
- "medianSequenceLength" => {
557
- "@type" => "SequenceReadLength",
558
- "hasUnit" => "uo:UO_0000244",
559
- "rdf:value" => @fastqc_object[:median_sequence_length],
560
- }
571
+ "@type" => "medianSequenceLength",
572
+ "sio:SIO_000221" => "obo:UO_0000244",
573
+ "sio:SIO_000300" => {
574
+ "@value" => @fastqc_object[:median_sequence_length],
575
+ "@type" => "xsd:decimal",
576
+ },
561
577
  }
562
578
  end
563
579
 
564
580
  def overall_mean_quality_score
565
581
  {
566
- "overallMeanBaseCallQuality" => {
567
- "@type" => "PhredQualityScore",
568
- "hasUnit" => "uo:UO_0000189",
569
- "rdf:value" => @fastqc_object[:overall_mean_quality_score],
570
- }
582
+ "@type" => "meanBaseCallQuality",
583
+ "sio:SIO_000221" => "obo:UO_0000189",
584
+ "sio:SIO_000300" => {
585
+ "@value" => @fastqc_object[:overall_mean_quality_score],
586
+ "@type" => "xsd:decimal",
587
+ },
571
588
  }
572
589
  end
573
590
 
574
591
  def overall_median_quality_score
575
592
  {
576
- "overallMedianBaseCallQuality" => {
577
- "@type" => "PhredQualityScore",
578
- "hasUnit" => "uo:UO_0000189",
579
- "rdf:value" => @fastqc_object[:overall_median_quality_score],
580
- }
593
+ "@type" => "medianBaseCallQuality",
594
+ "sio:SIO_000221" => "obo:UO_0000189",
595
+ "sio:SIO_000300" => {
596
+ "@value" => @fastqc_object[:overall_median_quality_score],
597
+ "@type" => "xsd:decimal",
598
+ },
581
599
  }
582
600
  end
583
601
 
584
602
  def overall_n_content
585
603
  {
586
- "overallNContent" => {
587
- "@type" => "BaseRatio",
588
- "hasUnit" => "uo:UO_0000187",
589
- "rdf:value" => @fastqc_object[:overall_n_content],
590
- }
604
+ "@type" => "nContent",
605
+ "sio:SIO_000221" => "obo:UO_0000187",
606
+ "sio:SIO_000300" => {
607
+ "@value" => @fastqc_object[:overall_n_content],
608
+ "@type" => "xsd:decimal",
609
+ },
591
610
  }
592
611
  end
593
612
 
@@ -682,6 +701,16 @@ module Bio
682
701
  "SequenceReadRatio",
683
702
  "SequenceReadLength",
684
703
  "SequenceDuplicationLevel",
704
+ "nContent",
705
+ "percentGC",
706
+ "medianBaseCallQuality",
707
+ "meanBaseCallQuality",
708
+ "totalSequences",
709
+ "filteredSequences",
710
+ "minimumSequenceLength",
711
+ "maxSequenceLength",
712
+ "meanSequenceLength",
713
+ "medianSequenceLength",
685
714
  ]
686
715
  end
687
716
 
@@ -715,7 +744,6 @@ module Bio
715
744
  "sequenceReadLength",
716
745
  "sequenceReadPercentage",
717
746
  "sequenceReadRelativeCount",
718
- "hasUnit",
719
747
  "overallMeanBaseCallQuality",
720
748
  "overallMedianBaseCallQuality",
721
749
  "overallNContent",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-fastqc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tazro Inutano Ohta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-29 00:00:00.000000000 Z
11
+ date: 2016-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip