bio-fastqc 0.2.1 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47531c30911b727ad8b3bdc50e26ca8a3a6467e1
4
- data.tar.gz: 68bfdb57eac8779b634abcce1bed1cd40986a602
3
+ metadata.gz: 7e21d6a0e0f6ec91c058b8a574e79e6fa9dad431
4
+ data.tar.gz: 93ed09413b3fdb49564c43e68a49bed3d1937d88
5
5
  SHA512:
6
- metadata.gz: b5b9b66d01c11669db71c96eb3c7e87c74e7ab45b55605481b6d75b14f5c06b7bb2813dff5d20833068d17bf7246aef663356523b46278621b863675ebc9186e
7
- data.tar.gz: e81822150a02e8e7634e0a5942fe0806297c5a86211dc2bb5609d2504b47d64e77c4e7122526069037e6af1166857ba1934444fb51aa4e241690f598e9afc051
6
+ metadata.gz: 48097e944bd5c7e76a804f7d0623f869dddb17fceb4da878878489f2ad745dab9b0c925cadccce12bee332ff3b3c8db5dea1ce22085acf1be914bddec2f04998
7
+ data.tar.gz: fc4ac83bb59b47e5bf554884efbdc7e7f179fece8bd94fa70529fe7858727cdf6826ec87ac6081c68d91c3c82f0c908ab46a8798f9b180d9974839aa02daa77c
data/Gemfile CHANGED
@@ -5,6 +5,8 @@ source "http://rubygems.org"
5
5
 
6
6
  gem 'rubyzip', '~> 1.1', '>= 1.1.0'
7
7
  gem 'thor', "~> 0.19.1"
8
+ gem 'json-ld', '~> 1.99'
9
+ gem 'rdf-turtle', '~> 1.99'
8
10
 
9
11
  # Add dependencies to develop your gem here.
10
12
  # Include everything needed to run rake, tests, features, etc.
@@ -19,5 +21,5 @@ group :development do
19
21
  gem 'simplecov', '~> 0.10'
20
22
 
21
23
  gem 'pry', '~> 0.10'
22
- gem 'bio-fastqc'
24
+ gem 'bio-fastqc', '0.3.0'
23
25
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.3.1
@@ -11,4 +11,6 @@
11
11
  require 'bio/fastqc/data'
12
12
  require 'bio/fastqc/cli'
13
13
  require 'bio/fastqc/parser'
14
+ require 'bio/fastqc/converter'
15
+ require 'bio/fastqc/semantics'
14
16
  require 'bio/fastqc'
@@ -6,13 +6,14 @@ require 'json'
6
6
  module Bio
7
7
  module FastQC
8
8
  class CLI < Thor
9
- desc "parse [filename]...", "parse fastqc data in fastqc directory or zipfile, output in json format"
10
- def parse(*files)
11
- files.each do |file|
12
- puts JSON.dump(Parser.new(Data.read(file)).summary)
13
- end
14
- rescue
15
- puts "Wrong input file type: specify fastqc result data, directory or zipfile"
9
+ desc "parse [--format format] [filename]", "parse fastqc data in fastqc directory or zipfile, output in json, json-ld, or rdf-turtle format."
10
+ option :format, :default => "json"
11
+ def parse(file)
12
+ data = Data.read(file)
13
+ summary = Parser.new(data).summary
14
+ puts Converter.new(summary).convert_to(options[:format])
15
+ # rescue
16
+ # puts "Wrong input file type: specify fastqc result data, directory or zipfile"
16
17
  end
17
18
  end
18
19
  end
@@ -0,0 +1,35 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Bio
4
+ module FastQC
5
+ class Converter
6
+ def initialize(summary_json)
7
+ @summary_json = summary_json
8
+ end
9
+
10
+ def convert_to(format)
11
+ case format
12
+ when "json"
13
+ to_json
14
+ when "json-ld"
15
+ to_jsonld
16
+ when "turtle"
17
+ to_turtle
18
+ end
19
+ end
20
+
21
+ def to_json
22
+ JSON.dump(@summary_json)
23
+ end
24
+
25
+ def to_jsonld
26
+ json_ld_object = Semantics.new(@summary_json).json_ld_object
27
+ JSON.dump(json_ld_object)
28
+ end
29
+
30
+ def to_turtle
31
+ Semantics.new(@summary_json).turtle
32
+ end
33
+ end
34
+ end
35
+ end
@@ -220,12 +220,13 @@ module Bio
220
220
  percent_gc: self.percent_gc,
221
221
  per_base_sequence_quality: self.per_base_sequence_quality,
222
222
  per_tile_sequence_quality: self.per_tile_sequence_quality,
223
- per_sequnce_quality_scores: self.per_sequence_quality_scores,
223
+ per_sequence_quality_scores: self.per_sequence_quality_scores,
224
224
  per_base_sequence_content: self.per_base_sequence_content,
225
225
  per_sequence_gc_content: self.per_sequence_gc_content,
226
226
  per_base_n_content: self.per_base_n_content,
227
227
  sequence_length_distribution: self.sequence_length_distribution,
228
- total_duplicate_percentage: self.total_duplicate_percentage, sequence_duplication_levels: self.sequence_duplication_levels,
228
+ total_duplicate_percentage: self.total_duplicate_percentage,
229
+ sequence_duplication_levels: self.sequence_duplication_levels,
229
230
  overrepresented_sequences: self.overrepresented_sequences,
230
231
  adapter_content: self.adapter_content,
231
232
  kmer_content: self.kmer_content,
@@ -0,0 +1,715 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'json/ld'
4
+ require 'rdf/turtle'
5
+
6
+ module Bio
7
+ module FastQC
8
+ class Semantics
9
+ def initialize(summary_json)
10
+ @summary = summary_json
11
+ end
12
+
13
+ def turtle
14
+ object = json_ld_object
15
+ graph = RDF::Graph.new << JSON::LD::API.toRdf(object)
16
+ graph.dump(:ttl, prefixes: turtle_prefixes)
17
+ end
18
+
19
+ def turtle_prefixes
20
+ {
21
+ "uo" => "http://purl.obolibrary.org/obo/",
22
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
23
+ }
24
+ end
25
+
26
+ def json_ld_object
27
+ object = [object_core, static_value_modules].flatten.inject(&:merge)
28
+ object["hasMatrix"] = matrix_modules
29
+ object
30
+ end
31
+
32
+ def identifier
33
+ "http://me.com/data/QNT" + @summary[:filename].split(".").first
34
+ end
35
+
36
+ def object_core
37
+ {
38
+ "@context" => jsonld_context,
39
+ "@id" => identifier,
40
+ }
41
+ end
42
+
43
+ def static_value_modules
44
+ [
45
+ fastqc_version,
46
+ filename,
47
+ file_type,
48
+ encoding,
49
+ total_sequences,
50
+ filtered_sequences,
51
+ sequence_length,
52
+ percent_gc,
53
+ total_duplicate_percentage,
54
+ min_length,
55
+ max_length,
56
+ overall_mean_quality_score,
57
+ overall_median_quality_score,
58
+ overall_n_content,
59
+ mean_sequence_length,
60
+ median_sequence_length,
61
+ ]
62
+ end
63
+
64
+ def matrix_modules
65
+ [
66
+ per_base_sequence_quality,
67
+ per_tile_sequence_quality,
68
+ per_sequence_quality_scores,
69
+ per_base_sequence_content,
70
+ per_sequence_gc_content,
71
+ per_base_n_content,
72
+ sequence_length_distribution,
73
+ sequence_duplication_levels,
74
+ overrepresented_sequences,
75
+ adapter_content,
76
+ kmer_content,
77
+ ]
78
+ end
79
+
80
+ def base_stat_class(base)
81
+ case base
82
+ when /-/ # when the base position is range like "50-100"
83
+ "BaseRangeStatistics"
84
+ else
85
+ "ExactBaseStatistics"
86
+ end
87
+ end
88
+
89
+ def fastqc_version
90
+ {}
91
+ end
92
+
93
+ def filename
94
+ {
95
+ "filename" => @summary[:filename],
96
+ }
97
+ end
98
+
99
+ def file_type
100
+ {
101
+ "fileType" => @summary[:file_type],
102
+ }
103
+ end
104
+
105
+ def encoding
106
+ {
107
+ "encoding" => @summary[:encoding],
108
+ }
109
+ end
110
+
111
+ def total_sequences
112
+ {
113
+ "totalSequences" => {
114
+ "@type" => "SequenceReadContent",
115
+ "hasUnit" => "uo:CountUnit",
116
+ "rdf:value" => @summary[:total_sequences],
117
+ }
118
+ }
119
+ end
120
+
121
+ def filtered_sequences
122
+ {
123
+ "filteredSequences" => {
124
+ "@type" => "SequenceReadContent",
125
+ "hasUnit" => "uo:CountUnit",
126
+ "rdf:value" => @summary[:filtered_sequences],
127
+ }
128
+ }
129
+ end
130
+
131
+ def sequence_length
132
+ {
133
+ "sequenceLength" => {
134
+ "@type" => "SequenceReadLength",
135
+ "hasUnit" => "uo:CountUnit",
136
+ "rdf:value" => @summary[:sequence_length],
137
+ }
138
+ }
139
+ end
140
+
141
+ def percent_gc
142
+ {
143
+ "percentGC" => {
144
+ "@type" => "NucleotideBaseContent",
145
+ "hasUnit" => "uo:CountUnit",
146
+ "rdf:value" => @summary[:percent_gc],
147
+ }
148
+ }
149
+ end
150
+
151
+ def per_base_sequence_quality
152
+ {
153
+ "@type" => "PerBaseSequenceQuality",
154
+ "hasRow" => per_base_sequence_quality_rows(@summary[:per_base_sequence_quality]),
155
+ }
156
+ end
157
+
158
+ def per_base_sequence_quality_rows(matrix)
159
+ matrix.map.with_index do |row, i|
160
+ base = row[0]
161
+ mean = row[1]
162
+ median = row[2]
163
+ lower_quartile = row[3]
164
+ upper_quartile = row[4]
165
+ tenth_percentile = row[5]
166
+ ninetieth_percentile = row[6]
167
+
168
+ {
169
+ "@type" => [
170
+ "Row",
171
+ base_stat_class(base),
172
+ ],
173
+ "rowIndex" => i,
174
+ "basePosition" => base,
175
+ "meanBaseCallQuality" => {
176
+ "@type" => "PhredQualityScore",
177
+ "hasUnit" => "uo:CountUnit",
178
+ "rdf:value" => mean,
179
+ },
180
+ "medianBaseCallQuality" => {
181
+ "@type" => "PhredQualityScore",
182
+ "hasUnit" => "uo:CountUnit",
183
+ "rdf:value" => median,
184
+ },
185
+ "baseCallQualityLowerQuartile" => {
186
+ "@type" => "PhredQualityScore",
187
+ "hasUnit" => "uo:CountUnit",
188
+ "rdf:value" => lower_quartile,
189
+ },
190
+ "baseCallQualityUpperQuartile" => {
191
+ "@type" => "PhredQualityScore",
192
+ "hasUnit" => "uo:CountUnit",
193
+ "rdf:value" => upper_quartile,
194
+ },
195
+ "baseCallQuality10thPercentile" => {
196
+ "@type" => "PhredQualityScore",
197
+ "hasUnit" => "uo:CountUnit",
198
+ "rdf:value" => tenth_percentile,
199
+ },
200
+ "baseCallQuality90thPercentile" => {
201
+ "@type" => "PhredQualityScore",
202
+ "hasUnit" => "uo:CountUnit",
203
+ "rdf:value" => ninetieth_percentile,
204
+ },
205
+ }
206
+ end
207
+ end
208
+
209
+ def per_tile_sequence_quality
210
+ {}
211
+ end
212
+
213
+ def per_sequence_quality_scores
214
+ {
215
+ "@type" => "PerSequnceQualityScores",
216
+ "hasRow" => per_sequence_quality_scores_rows(@summary[:per_sequence_quality_scores]),
217
+ }
218
+ end
219
+
220
+ def per_sequence_quality_scores_rows(matrix)
221
+ matrix.map.with_index do |row, i|
222
+ quality = row[0]
223
+ count = row[1]
224
+ {
225
+ "@type" => "Row",
226
+ "rowIndex" => i,
227
+ "baseCallQuality" => {
228
+ "@type" => "PhredQualityScore",
229
+ "hasUnit" => "uo:CountUnit",
230
+ "rdf:value" => quality,
231
+ },
232
+ "sequenceReadCount" => {
233
+ "@type" => "SequenceReadContent",
234
+ "hasUnit" => "uo:CountUnit",
235
+ "rdf:value" => count,
236
+ },
237
+ }
238
+ end
239
+ end
240
+
241
+ def per_base_sequence_content
242
+ {
243
+ "@type" => "PerBaseSequenceContent",
244
+ "hasRow" => per_base_sequence_content_rows(@summary[:per_base_sequence_content]),
245
+ }
246
+ end
247
+
248
+ def per_base_sequence_content_rows(matrix)
249
+ matrix.map.with_index do |row, i|
250
+ base = row[0]
251
+ guanine = row[1]
252
+ adenine = row[2]
253
+ thymine = row[3]
254
+ chytosine = row[4]
255
+ {
256
+ "@type" => [
257
+ "Row",
258
+ base_stat_class(base),
259
+ ],
260
+ "rowIndex" => i,
261
+ "basePosition" => base,
262
+ "percentGuanine" => {
263
+ "@type" => "NucleotideBaseContent",
264
+ "hasUnit" => "uo:Percentage",
265
+ "rdf:value" => guanine,
266
+ },
267
+ "percentAdenine" => {
268
+ "@type" => "NucleotideBaseContent",
269
+ "hasUnit" => "uo:Percentage",
270
+ "rdf:value" => adenine,
271
+ },
272
+ "percentThymine" => {
273
+ "@type" => "NucleotideBaseContent",
274
+ "hasUnit" => "uo:Percentage",
275
+ "rdf:value" => thymine,
276
+ },
277
+ "percentCytosine" => {
278
+ "@type" => "NucleotideBaseContent",
279
+ "hasUnit" => "uo:Percentage",
280
+ "rdf:value" => chytosine,
281
+ },
282
+ }
283
+ end
284
+ end
285
+
286
+ def per_sequence_gc_content
287
+ {
288
+ "@type" => "PerSequenceGCContent",
289
+ "hasRow" => per_sequence_gc_content_rows(@summary[:per_sequence_gc_content]),
290
+ }
291
+ end
292
+
293
+ def per_sequence_gc_content_rows(matrix)
294
+ matrix.map.with_index do |row, i|
295
+ gc_content = row[0]
296
+ count = row[1]
297
+ {
298
+ "@type" => "Row",
299
+ "rowIndex" => i,
300
+ "percentGC" => {
301
+ "@type" => "NucleotideBaseContent",
302
+ "hasunit" => "uo:Percent",
303
+ "rdf:value" => gc_content,
304
+ },
305
+ "sequenceReadCount" => {
306
+ "@type" => "SequenceReadContent",
307
+ "hasUnit" => "uo:CountUnit",
308
+ "rdf:value" => count,
309
+ },
310
+ }
311
+ end
312
+ end
313
+
314
+ def per_base_n_content
315
+ {
316
+ "@type" => "PerBaseNContent",
317
+ "hasRow" => per_base_n_content_rows(@summary[:per_base_n_content]),
318
+ }
319
+ end
320
+
321
+ def per_base_n_content_rows(matrix)
322
+ matrix.map.with_index do |row, i|
323
+ base = row[0]
324
+ n_count = row[1]
325
+ {
326
+ "@type" => [
327
+ "Row",
328
+ base_stat_class(base),
329
+ ],
330
+ "rowIndex" => i,
331
+ "basePosition" => base,
332
+ "nCount" => {
333
+ "@type" => "NContent",
334
+ "hasUnit" => "uo:Percentage",
335
+ "rdf:value" => n_count,
336
+ },
337
+ }
338
+ end
339
+ end
340
+
341
+ def sequence_length_distribution
342
+ {
343
+ "@type" => "SequenceLengthDistribution",
344
+ "hasRow" => sequence_length_distribution_rows(@summary[:sequence_length_distribution]),
345
+ }
346
+ end
347
+
348
+ def sequence_length_distribution_rows(matrix)
349
+ matrix.map.with_index do |row, i|
350
+ length = row[0]
351
+ count = row[1]
352
+ {
353
+ "@type" => "Row",
354
+ "rowIndex" => i,
355
+
356
+ "sequenceReadLength" => {
357
+ "@type" => "SequenceReadLength",
358
+ "hasUnit" => "uo:CountUnit",
359
+ "rdf:value" => length,
360
+ },
361
+ "sequenceReadCount" => {
362
+ "@type" => "SequenceReadContent",
363
+ "hasUnit" => "uo:CountUnit",
364
+ "rdf:value" => count,
365
+ },
366
+ }
367
+ end
368
+ end
369
+
370
+ def total_duplicate_percentage
371
+ {}
372
+ end
373
+
374
+ def sequence_duplication_levels
375
+ {
376
+ "@type" => "SequenceDuplicationLevels",
377
+ "hasRow" => sequence_duplication_levels_rows(@summary[:sequence_duplication_levels]),
378
+ }
379
+ end
380
+
381
+ def sequence_duplication_levels_rows(matrix)
382
+ matrix.map.with_index do |row, i|
383
+ duplication_level = row[0]
384
+ relative_count = row[1]
385
+ {
386
+ "@type" => "Row",
387
+ "rowIndex" => i,
388
+
389
+ "sequenceDuplicationLevel" => {
390
+ "@type" => "SequenceDuplicationLevel",
391
+ "hasUnit" => "uo:CountUnit",
392
+ "rdf:value" => duplication_level,
393
+ },
394
+ "sequenceReadRelativeCount" => {
395
+ "@type" => "SequenceReadContent",
396
+ "hasUnit" => "uo:CountUnit",
397
+ "rdf:value" => relative_count,
398
+ },
399
+ }
400
+ end
401
+ end
402
+
403
+ def overrepresented_sequences
404
+ {
405
+ "@type" => "OverrepresentedSequences",
406
+ "hasRow" => overrepresented_sequences_rows(@summary[:overrepresented_sequences]),
407
+ }
408
+ end
409
+
410
+ def overrepresented_sequences_rows(matrix)
411
+ matrix.map.with_index do |row, i|
412
+ sequence = row[0]
413
+ count = row[1]
414
+ percentage = row[2]
415
+ possible_source = row[3]
416
+ {
417
+ "@type" => "Row",
418
+ "rowIndex" => i,
419
+ "overrepresentedSequence" => sequence,
420
+ "sequenceReadCount" => {
421
+ "@type" => "SequenceReadContent",
422
+ "hasUnit" => "uo:CountUnit",
423
+ "rdf:value" => count,
424
+ },
425
+ "sequenceReadPercentage" => {
426
+ "@type" => "SequenceReadContent",
427
+ "hasUnit" => "uo:Percentage",
428
+ "rdf:value" => percentage,
429
+ },
430
+ "possibleSourceOfSequence" => possible_source,
431
+ }
432
+ end
433
+ end
434
+
435
+ def adapter_content
436
+ {}
437
+ end
438
+
439
+ def kmer_content
440
+ {
441
+ "@type" => "KmerContent",
442
+ "hasRow" => kmer_content_rows(@summary[:kmer_content]),
443
+ }
444
+ end
445
+
446
+ def kmer_content_rows(matrix)
447
+ matrix.map.with_index do |row, i|
448
+ sequence = row[0]
449
+ count = row[1]
450
+ ratio_overall = row[2]
451
+ ratio_max = row[3]
452
+ ratio_max_position = row[4]
453
+ {
454
+ "@type" => "Row",
455
+ "rowIndex" => i,
456
+ "kmerSequence" => sequence,
457
+ "sequenceReadCount" => {
458
+ "@type" => "SequenceReadContent",
459
+ "hasUnit" => "uo:CountUnit",
460
+ "rdf:value" => count,
461
+ },
462
+ "observedPerExpectedOverall" => {
463
+ "@type" => "SequenceReadContent",
464
+ "hasUnit" => "uo:Ratio",
465
+ "rdf:value" => ratio_overall,
466
+ },
467
+ "observedPerExpectedMax" => {
468
+ "@type" => "SequenceReadContent",
469
+ "hasUnit" => "uo:Ratio",
470
+ "rdf:value" => ratio_max,
471
+ },
472
+ "observedPerExpectedMaxPosition" => ratio_max_position,
473
+ }
474
+ end
475
+ end
476
+
477
+ def min_length
478
+ {
479
+ "minSequenceLength" => {
480
+ "@type" => "SequenceReadLength",
481
+ "hasUnit" => "uo:CountUnit",
482
+ "rdf:value" => @summary[:min_length],
483
+ }
484
+ }
485
+ end
486
+
487
+ def max_length
488
+ {
489
+ "maxSequenceLength" => {
490
+ "@type" => "SequenceReadLength",
491
+ "hasUnit" => "uo:CountUnit",
492
+ "rdf:value" => @summary[:max_length],
493
+ }
494
+ }
495
+ end
496
+
497
+ def mean_sequence_length
498
+ {
499
+ "meanSequenceLength" => {
500
+ "@type" => "SequenceReadLength",
501
+ "hasUnit" => "uo:CountUnit",
502
+ "rdf:value" => @summary[:mean_sequence_length],
503
+ }
504
+ }
505
+ end
506
+
507
+ def median_sequence_length
508
+ {
509
+ "medianSequenceLength" => {
510
+ "@type" => "SequenceReadLength",
511
+ "hasUnit" => "uo:CountUnit",
512
+ "rdf:value" => @summary[:median_sequence_length],
513
+ }
514
+ }
515
+ end
516
+
517
+ def overall_mean_quality_score
518
+ {
519
+ "overallMeanBaseCallQuality" => {
520
+ "@type" => "PhredQualityScore",
521
+ "hasUnit" => "uo:CountUnit",
522
+ "rdf:value" => @summary[:overall_mean_quality_score],
523
+ }
524
+ }
525
+ end
526
+
527
+ def overall_median_quality_score
528
+ {
529
+ "overallMedianBaseCallQuality" => {
530
+ "@type" => "PhredQualityScore",
531
+ "hasUnit" => "uo:CountUnit",
532
+ "rdf:value" => @summary[:overall_median_quality_score],
533
+ }
534
+ }
535
+ end
536
+
537
+ def overall_n_content
538
+ {
539
+ "overallNContent" => {
540
+ "@type" => "NContent",
541
+ "hasUnit" => "uo:Percentage",
542
+ "rdf:value" => @summary[:overall_n_content],
543
+ }
544
+ }
545
+ end
546
+
547
+ #
548
+ # Generate JSON-LD context object
549
+ #
550
+
551
+ def jsonld_context
552
+ # definition of imported terms in @context
553
+ object = imported_keywords
554
+
555
+ # definition of local ontology terms
556
+ domain = "http://me.com/sos#"
557
+
558
+ # definition of class in @context
559
+ sos_class.each do |term|
560
+ object[term] = {}
561
+ object[term]["@id"] = domain + term
562
+ object[term]["@type"] = "@id"
563
+ end
564
+
565
+ # definition of object properties in @context
566
+ sos_object_properties.each do |term|
567
+ object[term] = {}
568
+ object[term]["@id"] = domain + term
569
+ object[term]["@type"] = "@id"
570
+ end
571
+
572
+ sos_data_properties_string.each do |term|
573
+ object[term] = {}
574
+ object[term]["@id"] = domain + term
575
+ object[term]["@type"] = "http://www.w3.org/2001/XMLSchema#string"
576
+ end
577
+
578
+ sos_data_properties_integer.each do |term|
579
+ object[term] = {}
580
+ object[term]["@id"] = domain + term
581
+ object[term]["@type"] = "http://www.w3.org/2001/XMLSchema#integer"
582
+ end
583
+
584
+ sos_data_properties_float.each do |term|
585
+ object[term] = {}
586
+ object[term]["@id"] = domain + term
587
+ object[term]["@type"] = "http://www.w3.org/2001/XMLSchema#float"
588
+ end
589
+
590
+ object
591
+ end
592
+
593
+ def imported_keywords
594
+ {
595
+ "uo" => "http://purl.obolibrary.org/obo/",
596
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
597
+ }
598
+ end
599
+
600
+ #
601
+ # definition of classes
602
+ #
603
+
604
+ def sos_class
605
+ [
606
+ sos_class_general,
607
+ sos_class_fastqc_modules,
608
+ sos_class_for_values,
609
+ ].flatten
610
+ end
611
+
612
+ def sos_class_general
613
+ [
614
+ "SequenceStatisticsReport",
615
+ "SequenceStatisticsMatrix",
616
+ "Row",
617
+ "ExactBaseStatistics",
618
+ "BaseRangeStatistics",
619
+ ]
620
+ end
621
+
622
+ def sos_class_fastqc_modules
623
+ [
624
+ "PerBaseSequenceQuality",
625
+ "PerTileSequenceQuality",
626
+ "PerSequnceQualityScores",
627
+ "PerBaseSequenceContent",
628
+ "PerSequenceGCContent",
629
+ "PerBaseNContent",
630
+ "SequenceLengthDistribution",
631
+ "SequenceDuplicationLevels",
632
+ "OverrepresentedSequences",
633
+ "KmerContent",
634
+ ]
635
+ end
636
+
637
+ def sos_class_for_values
638
+ [
639
+ "PhredQualityScore",
640
+ "NucleotideBaseContent",
641
+ "SequenceReadContent",
642
+ "SequenceReadLength",
643
+ "SequenceDuplicationLevel",
644
+ ]
645
+ end
646
+
647
+ #
648
+ # definition of predicates
649
+ #
650
+
651
+ def sos_object_properties
652
+ [
653
+ "hasMatrix",
654
+ "totalSequences",
655
+ "filteredSequences",
656
+ "sequenceLength",
657
+ "percentGC",
658
+ "hasRow",
659
+ "basePosition",
660
+ "kmerSequence",
661
+ "meanBaseCallQuality",
662
+ "medianBaseCallQuality",
663
+ "nCount",
664
+ "observedPerExpectedMax",
665
+ "observedPerExpectedMaxPosition",
666
+ "observedPerExpectedOverall",
667
+ "percentAdenine",
668
+ "percentCytosine",
669
+ "percentGC",
670
+ "percentGuanine",
671
+ "percentThymine",
672
+ "sequenceDuplicationLevel",
673
+ "sequenceReadCount",
674
+ "sequenceReadLength",
675
+ "sequenceReadPercentage",
676
+ "sequenceReadRelativeCount",
677
+ "hasUnit",
678
+ "overallMeanBaseCallQuality",
679
+ "overallMedianBaseCallQuality",
680
+ "overallNContent",
681
+ ]
682
+ end
683
+
684
+ def sos_data_properties_string
685
+ [
686
+ "filename",
687
+ "fileType",
688
+ "encoding",
689
+ "possibleSourceOfSequence",
690
+ "overrepresentedSequence",
691
+ ]
692
+ end
693
+
694
+ def sos_data_properties_integer
695
+ [
696
+ "rowIndex",
697
+ ]
698
+ end
699
+
700
+ def sos_data_properties_float
701
+ [
702
+ "baseCallQuality",
703
+ "baseCallQuality10thPercentile",
704
+ "baseCallQuality90thPercentile",
705
+ "baseCallQualityLowerQuartile",
706
+ "baseCallQualityUpperQuartile",
707
+ "minSequenceLength",
708
+ "maxSequenceLength",
709
+ "meanSequenceLength",
710
+ "medianSequenceLength",
711
+ ]
712
+ end
713
+ end
714
+ end
715
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-fastqc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tazro Inutano Ohta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-11 00:00:00.000000000 Z
11
+ date: 2016-03-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -44,6 +44,34 @@ dependencies:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
46
  version: 0.19.1
47
+ - !ruby/object:Gem::Dependency
48
+ name: json-ld
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.99'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '1.99'
61
+ - !ruby/object:Gem::Dependency
62
+ name: rdf-turtle
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.99'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.99'
47
75
  - !ruby/object:Gem::Dependency
48
76
  name: bundler
49
77
  requirement: !ruby/object:Gem::Requirement
@@ -160,16 +188,16 @@ dependencies:
160
188
  name: bio-fastqc
161
189
  requirement: !ruby/object:Gem::Requirement
162
190
  requirements:
163
- - - ">="
191
+ - - '='
164
192
  - !ruby/object:Gem::Version
165
- version: '0'
193
+ version: 0.3.0
166
194
  type: :development
167
195
  prerelease: false
168
196
  version_requirements: !ruby/object:Gem::Requirement
169
197
  requirements:
170
- - - ">="
198
+ - - '='
171
199
  - !ruby/object:Gem::Version
172
- version: '0'
200
+ version: 0.3.0
173
201
  description: ruby parser for FastQC, a quality control software for high-throughput
174
202
  sequencing data.
175
203
  email: inutano@gmail.com
@@ -194,8 +222,10 @@ files:
194
222
  - lib/bio-fastqc.rb
195
223
  - lib/bio/fastqc.rb
196
224
  - lib/bio/fastqc/cli.rb
225
+ - lib/bio/fastqc/converter.rb
197
226
  - lib/bio/fastqc/data.rb
198
227
  - lib/bio/fastqc/parser.rb
228
+ - lib/bio/fastqc/semantics.rb
199
229
  - spec/bio-fastqc_spec.rb
200
230
  - spec/example_fastqc.zip
201
231
  - spec/spec_helper.rb