bio-fastqc 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47531c30911b727ad8b3bdc50e26ca8a3a6467e1
4
- data.tar.gz: 68bfdb57eac8779b634abcce1bed1cd40986a602
3
+ metadata.gz: 7e21d6a0e0f6ec91c058b8a574e79e6fa9dad431
4
+ data.tar.gz: 93ed09413b3fdb49564c43e68a49bed3d1937d88
5
5
  SHA512:
6
- metadata.gz: b5b9b66d01c11669db71c96eb3c7e87c74e7ab45b55605481b6d75b14f5c06b7bb2813dff5d20833068d17bf7246aef663356523b46278621b863675ebc9186e
7
- data.tar.gz: e81822150a02e8e7634e0a5942fe0806297c5a86211dc2bb5609d2504b47d64e77c4e7122526069037e6af1166857ba1934444fb51aa4e241690f598e9afc051
6
+ metadata.gz: 48097e944bd5c7e76a804f7d0623f869dddb17fceb4da878878489f2ad745dab9b0c925cadccce12bee332ff3b3c8db5dea1ce22085acf1be914bddec2f04998
7
+ data.tar.gz: fc4ac83bb59b47e5bf554884efbdc7e7f179fece8bd94fa70529fe7858727cdf6826ec87ac6081c68d91c3c82f0c908ab46a8798f9b180d9974839aa02daa77c
data/Gemfile CHANGED
@@ -5,6 +5,8 @@ source "http://rubygems.org"
5
5
 
6
6
  gem 'rubyzip', '~> 1.1', '>= 1.1.0'
7
7
  gem 'thor', "~> 0.19.1"
8
+ gem 'json-ld', '~> 1.99'
9
+ gem 'rdf-turtle', '~> 1.99'
8
10
 
9
11
  # Add dependencies to develop your gem here.
10
12
  # Include everything needed to run rake, tests, features, etc.
@@ -19,5 +21,5 @@ group :development do
19
21
  gem 'simplecov', '~> 0.10'
20
22
 
21
23
  gem 'pry', '~> 0.10'
22
- gem 'bio-fastqc'
24
+ gem 'bio-fastqc', '0.3.0'
23
25
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.3.1
@@ -11,4 +11,6 @@
11
11
  require 'bio/fastqc/data'
12
12
  require 'bio/fastqc/cli'
13
13
  require 'bio/fastqc/parser'
14
+ require 'bio/fastqc/converter'
15
+ require 'bio/fastqc/semantics'
14
16
  require 'bio/fastqc'
@@ -6,13 +6,14 @@ require 'json'
6
6
  module Bio
7
7
  module FastQC
8
8
  class CLI < Thor
9
- desc "parse [filename]...", "parse fastqc data in fastqc directory or zipfile, output in json format"
10
- def parse(*files)
11
- files.each do |file|
12
- puts JSON.dump(Parser.new(Data.read(file)).summary)
13
- end
14
- rescue
15
- puts "Wrong input file type: specify fastqc result data, directory or zipfile"
9
+ desc "parse [--format format] [filename]", "parse fastqc data in fastqc directory or zipfile, output in json, json-ld, or rdf-turtle format."
10
+ option :format, :default => "json"
11
+ def parse(file)
12
+ data = Data.read(file)
13
+ summary = Parser.new(data).summary
14
+ puts Converter.new(summary).convert_to(options[:format])
15
+ # rescue
16
+ # puts "Wrong input file type: specify fastqc result data, directory or zipfile"
16
17
  end
17
18
  end
18
19
  end
@@ -0,0 +1,35 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Bio
4
+ module FastQC
5
+ class Converter
6
+ def initialize(summary_json)
7
+ @summary_json = summary_json
8
+ end
9
+
10
+ def convert_to(format)
11
+ case format
12
+ when "json"
13
+ to_json
14
+ when "json-ld"
15
+ to_jsonld
16
+ when "turtle"
17
+ to_turtle
18
+ end
19
+ end
20
+
21
+ def to_json
22
+ JSON.dump(@summary_json)
23
+ end
24
+
25
+ def to_jsonld
26
+ json_ld_object = Semantics.new(@summary_json).json_ld_object
27
+ JSON.dump(json_ld_object)
28
+ end
29
+
30
+ def to_turtle
31
+ Semantics.new(@summary_json).turtle
32
+ end
33
+ end
34
+ end
35
+ end
@@ -220,12 +220,13 @@ module Bio
220
220
  percent_gc: self.percent_gc,
221
221
  per_base_sequence_quality: self.per_base_sequence_quality,
222
222
  per_tile_sequence_quality: self.per_tile_sequence_quality,
223
- per_sequnce_quality_scores: self.per_sequence_quality_scores,
223
+ per_sequence_quality_scores: self.per_sequence_quality_scores,
224
224
  per_base_sequence_content: self.per_base_sequence_content,
225
225
  per_sequence_gc_content: self.per_sequence_gc_content,
226
226
  per_base_n_content: self.per_base_n_content,
227
227
  sequence_length_distribution: self.sequence_length_distribution,
228
- total_duplicate_percentage: self.total_duplicate_percentage, sequence_duplication_levels: self.sequence_duplication_levels,
228
+ total_duplicate_percentage: self.total_duplicate_percentage,
229
+ sequence_duplication_levels: self.sequence_duplication_levels,
229
230
  overrepresented_sequences: self.overrepresented_sequences,
230
231
  adapter_content: self.adapter_content,
231
232
  kmer_content: self.kmer_content,
@@ -0,0 +1,715 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'json/ld'
4
+ require 'rdf/turtle'
5
+
6
+ module Bio
7
+ module FastQC
8
+ class Semantics
9
+ def initialize(summary_json)
10
+ @summary = summary_json
11
+ end
12
+
13
+ def turtle
14
+ object = json_ld_object
15
+ graph = RDF::Graph.new << JSON::LD::API.toRdf(object)
16
+ graph.dump(:ttl, prefixes: turtle_prefixes)
17
+ end
18
+
19
+ def turtle_prefixes
20
+ {
21
+ "uo" => "http://purl.obolibrary.org/obo/",
22
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
23
+ }
24
+ end
25
+
26
+ def json_ld_object
27
+ object = [object_core, static_value_modules].flatten.inject(&:merge)
28
+ object["hasMatrix"] = matrix_modules
29
+ object
30
+ end
31
+
32
+ def identifier
33
+ "http://me.com/data/QNT" + @summary[:filename].split(".").first
34
+ end
35
+
36
+ def object_core
37
+ {
38
+ "@context" => jsonld_context,
39
+ "@id" => identifier,
40
+ }
41
+ end
42
+
43
+ def static_value_modules
44
+ [
45
+ fastqc_version,
46
+ filename,
47
+ file_type,
48
+ encoding,
49
+ total_sequences,
50
+ filtered_sequences,
51
+ sequence_length,
52
+ percent_gc,
53
+ total_duplicate_percentage,
54
+ min_length,
55
+ max_length,
56
+ overall_mean_quality_score,
57
+ overall_median_quality_score,
58
+ overall_n_content,
59
+ mean_sequence_length,
60
+ median_sequence_length,
61
+ ]
62
+ end
63
+
64
+ def matrix_modules
65
+ [
66
+ per_base_sequence_quality,
67
+ per_tile_sequence_quality,
68
+ per_sequence_quality_scores,
69
+ per_base_sequence_content,
70
+ per_sequence_gc_content,
71
+ per_base_n_content,
72
+ sequence_length_distribution,
73
+ sequence_duplication_levels,
74
+ overrepresented_sequences,
75
+ adapter_content,
76
+ kmer_content,
77
+ ]
78
+ end
79
+
80
+ def base_stat_class(base)
81
+ case base
82
+ when /-/ # when the base position is range like "50-100"
83
+ "BaseRangeStatistics"
84
+ else
85
+ "ExactBaseStatistics"
86
+ end
87
+ end
88
+
89
+ def fastqc_version
90
+ {}
91
+ end
92
+
93
+ def filename
94
+ {
95
+ "filename" => @summary[:filename],
96
+ }
97
+ end
98
+
99
+ def file_type
100
+ {
101
+ "fileType" => @summary[:file_type],
102
+ }
103
+ end
104
+
105
+ def encoding
106
+ {
107
+ "encoding" => @summary[:encoding],
108
+ }
109
+ end
110
+
111
+ def total_sequences
112
+ {
113
+ "totalSequences" => {
114
+ "@type" => "SequenceReadContent",
115
+ "hasUnit" => "uo:CountUnit",
116
+ "rdf:value" => @summary[:total_sequences],
117
+ }
118
+ }
119
+ end
120
+
121
+ def filtered_sequences
122
+ {
123
+ "filteredSequences" => {
124
+ "@type" => "SequenceReadContent",
125
+ "hasUnit" => "uo:CountUnit",
126
+ "rdf:value" => @summary[:filtered_sequences],
127
+ }
128
+ }
129
+ end
130
+
131
+ def sequence_length
132
+ {
133
+ "sequenceLength" => {
134
+ "@type" => "SequenceReadLength",
135
+ "hasUnit" => "uo:CountUnit",
136
+ "rdf:value" => @summary[:sequence_length],
137
+ }
138
+ }
139
+ end
140
+
141
+ def percent_gc
142
+ {
143
+ "percentGC" => {
144
+ "@type" => "NucleotideBaseContent",
145
+ "hasUnit" => "uo:CountUnit",
146
+ "rdf:value" => @summary[:percent_gc],
147
+ }
148
+ }
149
+ end
150
+
151
+ def per_base_sequence_quality
152
+ {
153
+ "@type" => "PerBaseSequenceQuality",
154
+ "hasRow" => per_base_sequence_quality_rows(@summary[:per_base_sequence_quality]),
155
+ }
156
+ end
157
+
158
+ def per_base_sequence_quality_rows(matrix)
159
+ matrix.map.with_index do |row, i|
160
+ base = row[0]
161
+ mean = row[1]
162
+ median = row[2]
163
+ lower_quartile = row[3]
164
+ upper_quartile = row[4]
165
+ tenth_percentile = row[5]
166
+ ninetieth_percentile = row[6]
167
+
168
+ {
169
+ "@type" => [
170
+ "Row",
171
+ base_stat_class(base),
172
+ ],
173
+ "rowIndex" => i,
174
+ "basePosition" => base,
175
+ "meanBaseCallQuality" => {
176
+ "@type" => "PhredQualityScore",
177
+ "hasUnit" => "uo:CountUnit",
178
+ "rdf:value" => mean,
179
+ },
180
+ "medianBaseCallQuality" => {
181
+ "@type" => "PhredQualityScore",
182
+ "hasUnit" => "uo:CountUnit",
183
+ "rdf:value" => median,
184
+ },
185
+ "baseCallQualityLowerQuartile" => {
186
+ "@type" => "PhredQualityScore",
187
+ "hasUnit" => "uo:CountUnit",
188
+ "rdf:value" => lower_quartile,
189
+ },
190
+ "baseCallQualityUpperQuartile" => {
191
+ "@type" => "PhredQualityScore",
192
+ "hasUnit" => "uo:CountUnit",
193
+ "rdf:value" => upper_quartile,
194
+ },
195
+ "baseCallQuality10thPercentile" => {
196
+ "@type" => "PhredQualityScore",
197
+ "hasUnit" => "uo:CountUnit",
198
+ "rdf:value" => tenth_percentile,
199
+ },
200
+ "baseCallQuality90thPercentile" => {
201
+ "@type" => "PhredQualityScore",
202
+ "hasUnit" => "uo:CountUnit",
203
+ "rdf:value" => ninetieth_percentile,
204
+ },
205
+ }
206
+ end
207
+ end
208
+
209
+ def per_tile_sequence_quality
210
+ {}
211
+ end
212
+
213
+ def per_sequence_quality_scores
214
+ {
215
+ "@type" => "PerSequnceQualityScores",
216
+ "hasRow" => per_sequence_quality_scores_rows(@summary[:per_sequence_quality_scores]),
217
+ }
218
+ end
219
+
220
+ def per_sequence_quality_scores_rows(matrix)
221
+ matrix.map.with_index do |row, i|
222
+ quality = row[0]
223
+ count = row[1]
224
+ {
225
+ "@type" => "Row",
226
+ "rowIndex" => i,
227
+ "baseCallQuality" => {
228
+ "@type" => "PhredQualityScore",
229
+ "hasUnit" => "uo:CountUnit",
230
+ "rdf:value" => quality,
231
+ },
232
+ "sequenceReadCount" => {
233
+ "@type" => "SequenceReadContent",
234
+ "hasUnit" => "uo:CountUnit",
235
+ "rdf:value" => count,
236
+ },
237
+ }
238
+ end
239
+ end
240
+
241
+ def per_base_sequence_content
242
+ {
243
+ "@type" => "PerBaseSequenceContent",
244
+ "hasRow" => per_base_sequence_content_rows(@summary[:per_base_sequence_content]),
245
+ }
246
+ end
247
+
248
+ def per_base_sequence_content_rows(matrix)
249
+ matrix.map.with_index do |row, i|
250
+ base = row[0]
251
+ guanine = row[1]
252
+ adenine = row[2]
253
+ thymine = row[3]
254
+ chytosine = row[4]
255
+ {
256
+ "@type" => [
257
+ "Row",
258
+ base_stat_class(base),
259
+ ],
260
+ "rowIndex" => i,
261
+ "basePosition" => base,
262
+ "percentGuanine" => {
263
+ "@type" => "NucleotideBaseContent",
264
+ "hasUnit" => "uo:Percentage",
265
+ "rdf:value" => guanine,
266
+ },
267
+ "percentAdenine" => {
268
+ "@type" => "NucleotideBaseContent",
269
+ "hasUnit" => "uo:Percentage",
270
+ "rdf:value" => adenine,
271
+ },
272
+ "percentThymine" => {
273
+ "@type" => "NucleotideBaseContent",
274
+ "hasUnit" => "uo:Percentage",
275
+ "rdf:value" => thymine,
276
+ },
277
+ "percentCytosine" => {
278
+ "@type" => "NucleotideBaseContent",
279
+ "hasUnit" => "uo:Percentage",
280
+ "rdf:value" => chytosine,
281
+ },
282
+ }
283
+ end
284
+ end
285
+
286
+ def per_sequence_gc_content
287
+ {
288
+ "@type" => "PerSequenceGCContent",
289
+ "hasRow" => per_sequence_gc_content_rows(@summary[:per_sequence_gc_content]),
290
+ }
291
+ end
292
+
293
+ def per_sequence_gc_content_rows(matrix)
294
+ matrix.map.with_index do |row, i|
295
+ gc_content = row[0]
296
+ count = row[1]
297
+ {
298
+ "@type" => "Row",
299
+ "rowIndex" => i,
300
+ "percentGC" => {
301
+ "@type" => "NucleotideBaseContent",
302
+ "hasunit" => "uo:Percent",
303
+ "rdf:value" => gc_content,
304
+ },
305
+ "sequenceReadCount" => {
306
+ "@type" => "SequenceReadContent",
307
+ "hasUnit" => "uo:CountUnit",
308
+ "rdf:value" => count,
309
+ },
310
+ }
311
+ end
312
+ end
313
+
314
+ def per_base_n_content
315
+ {
316
+ "@type" => "PerBaseNContent",
317
+ "hasRow" => per_base_n_content_rows(@summary[:per_base_n_content]),
318
+ }
319
+ end
320
+
321
+ def per_base_n_content_rows(matrix)
322
+ matrix.map.with_index do |row, i|
323
+ base = row[0]
324
+ n_count = row[1]
325
+ {
326
+ "@type" => [
327
+ "Row",
328
+ base_stat_class(base),
329
+ ],
330
+ "rowIndex" => i,
331
+ "basePosition" => base,
332
+ "nCount" => {
333
+ "@type" => "NContent",
334
+ "hasUnit" => "uo:Percentage",
335
+ "rdf:value" => n_count,
336
+ },
337
+ }
338
+ end
339
+ end
340
+
341
+ def sequence_length_distribution
342
+ {
343
+ "@type" => "SequenceLengthDistribution",
344
+ "hasRow" => sequence_length_distribution_rows(@summary[:sequence_length_distribution]),
345
+ }
346
+ end
347
+
348
+ def sequence_length_distribution_rows(matrix)
349
+ matrix.map.with_index do |row, i|
350
+ length = row[0]
351
+ count = row[1]
352
+ {
353
+ "@type" => "Row",
354
+ "rowIndex" => i,
355
+
356
+ "sequenceReadLength" => {
357
+ "@type" => "SequenceReadLength",
358
+ "hasUnit" => "uo:CountUnit",
359
+ "rdf:value" => length,
360
+ },
361
+ "sequenceReadCount" => {
362
+ "@type" => "SequenceReadContent",
363
+ "hasUnit" => "uo:CountUnit",
364
+ "rdf:value" => count,
365
+ },
366
+ }
367
+ end
368
+ end
369
+
370
+ def total_duplicate_percentage
371
+ {}
372
+ end
373
+
374
+ def sequence_duplication_levels
375
+ {
376
+ "@type" => "SequenceDuplicationLevels",
377
+ "hasRow" => sequence_duplication_levels_rows(@summary[:sequence_duplication_levels]),
378
+ }
379
+ end
380
+
381
+ def sequence_duplication_levels_rows(matrix)
382
+ matrix.map.with_index do |row, i|
383
+ duplication_level = row[0]
384
+ relative_count = row[1]
385
+ {
386
+ "@type" => "Row",
387
+ "rowIndex" => i,
388
+
389
+ "sequenceDuplicationLevel" => {
390
+ "@type" => "SequenceDuplicationLevel",
391
+ "hasUnit" => "uo:CountUnit",
392
+ "rdf:value" => duplication_level,
393
+ },
394
+ "sequenceReadRelativeCount" => {
395
+ "@type" => "SequenceReadContent",
396
+ "hasUnit" => "uo:CountUnit",
397
+ "rdf:value" => relative_count,
398
+ },
399
+ }
400
+ end
401
+ end
402
+
403
+ def overrepresented_sequences
404
+ {
405
+ "@type" => "OverrepresentedSequences",
406
+ "hasRow" => overrepresented_sequences_rows(@summary[:overrepresented_sequences]),
407
+ }
408
+ end
409
+
410
+ def overrepresented_sequences_rows(matrix)
411
+ matrix.map.with_index do |row, i|
412
+ sequence = row[0]
413
+ count = row[1]
414
+ percentage = row[2]
415
+ possible_source = row[3]
416
+ {
417
+ "@type" => "Row",
418
+ "rowIndex" => i,
419
+ "overrepresentedSequence" => sequence,
420
+ "sequenceReadCount" => {
421
+ "@type" => "SequenceReadContent",
422
+ "hasUnit" => "uo:CountUnit",
423
+ "rdf:value" => count,
424
+ },
425
+ "sequenceReadPercentage" => {
426
+ "@type" => "SequenceReadContent",
427
+ "hasUnit" => "uo:Percentage",
428
+ "rdf:value" => percentage,
429
+ },
430
+ "possibleSourceOfSequence" => possible_source,
431
+ }
432
+ end
433
+ end
434
+
435
+ def adapter_content
436
+ {}
437
+ end
438
+
439
+ def kmer_content
440
+ {
441
+ "@type" => "KmerContent",
442
+ "hasRow" => kmer_content_rows(@summary[:kmer_content]),
443
+ }
444
+ end
445
+
446
+ def kmer_content_rows(matrix)
447
+ matrix.map.with_index do |row, i|
448
+ sequence = row[0]
449
+ count = row[1]
450
+ ratio_overall = row[2]
451
+ ratio_max = row[3]
452
+ ratio_max_position = row[4]
453
+ {
454
+ "@type" => "Row",
455
+ "rowIndex" => i,
456
+ "kmerSequence" => sequence,
457
+ "sequenceReadCount" => {
458
+ "@type" => "SequenceReadContent",
459
+ "hasUnit" => "uo:CountUnit",
460
+ "rdf:value" => count,
461
+ },
462
+ "observedPerExpectedOverall" => {
463
+ "@type" => "SequenceReadContent",
464
+ "hasUnit" => "uo:Ratio",
465
+ "rdf:value" => ratio_overall,
466
+ },
467
+ "observedPerExpectedMax" => {
468
+ "@type" => "SequenceReadContent",
469
+ "hasUnit" => "uo:Ratio",
470
+ "rdf:value" => ratio_max,
471
+ },
472
+ "observedPerExpectedMaxPosition" => ratio_max_position,
473
+ }
474
+ end
475
+ end
476
+
477
+ def min_length
478
+ {
479
+ "minSequenceLength" => {
480
+ "@type" => "SequenceReadLength",
481
+ "hasUnit" => "uo:CountUnit",
482
+ "rdf:value" => @summary[:min_length],
483
+ }
484
+ }
485
+ end
486
+
487
+ def max_length
488
+ {
489
+ "maxSequenceLength" => {
490
+ "@type" => "SequenceReadLength",
491
+ "hasUnit" => "uo:CountUnit",
492
+ "rdf:value" => @summary[:max_length],
493
+ }
494
+ }
495
+ end
496
+
497
+ def mean_sequence_length
498
+ {
499
+ "meanSequenceLength" => {
500
+ "@type" => "SequenceReadLength",
501
+ "hasUnit" => "uo:CountUnit",
502
+ "rdf:value" => @summary[:mean_sequence_length],
503
+ }
504
+ }
505
+ end
506
+
507
+ def median_sequence_length
508
+ {
509
+ "medianSequenceLength" => {
510
+ "@type" => "SequenceReadLength",
511
+ "hasUnit" => "uo:CountUnit",
512
+ "rdf:value" => @summary[:median_sequence_length],
513
+ }
514
+ }
515
+ end
516
+
517
+ def overall_mean_quality_score
518
+ {
519
+ "overallMeanBaseCallQuality" => {
520
+ "@type" => "PhredQualityScore",
521
+ "hasUnit" => "uo:CountUnit",
522
+ "rdf:value" => @summary[:overall_mean_quality_score],
523
+ }
524
+ }
525
+ end
526
+
527
+ def overall_median_quality_score
528
+ {
529
+ "overallMedianBaseCallQuality" => {
530
+ "@type" => "PhredQualityScore",
531
+ "hasUnit" => "uo:CountUnit",
532
+ "rdf:value" => @summary[:overall_median_quality_score],
533
+ }
534
+ }
535
+ end
536
+
537
+ def overall_n_content
538
+ {
539
+ "overallNContent" => {
540
+ "@type" => "NContent",
541
+ "hasUnit" => "uo:Percentage",
542
+ "rdf:value" => @summary[:overall_n_content],
543
+ }
544
+ }
545
+ end
546
+
547
+ #
548
+ # Generate JSON-LD context object
549
+ #
550
+
551
+ def jsonld_context
552
+ # definition of imported terms in @context
553
+ object = imported_keywords
554
+
555
+ # definition of local ontology terms
556
+ domain = "http://me.com/sos#"
557
+
558
+ # definition of class in @context
559
+ sos_class.each do |term|
560
+ object[term] = {}
561
+ object[term]["@id"] = domain + term
562
+ object[term]["@type"] = "@id"
563
+ end
564
+
565
+ # definition of object properties in @context
566
+ sos_object_properties.each do |term|
567
+ object[term] = {}
568
+ object[term]["@id"] = domain + term
569
+ object[term]["@type"] = "@id"
570
+ end
571
+
572
+ sos_data_properties_string.each do |term|
573
+ object[term] = {}
574
+ object[term]["@id"] = domain + term
575
+ object[term]["@type"] = "http://www.w3.org/2001/XMLSchema#string"
576
+ end
577
+
578
+ sos_data_properties_integer.each do |term|
579
+ object[term] = {}
580
+ object[term]["@id"] = domain + term
581
+ object[term]["@type"] = "http://www.w3.org/2001/XMLSchema#integer"
582
+ end
583
+
584
+ sos_data_properties_float.each do |term|
585
+ object[term] = {}
586
+ object[term]["@id"] = domain + term
587
+ object[term]["@type"] = "http://www.w3.org/2001/XMLSchema#float"
588
+ end
589
+
590
+ object
591
+ end
592
+
593
+ def imported_keywords
594
+ {
595
+ "uo" => "http://purl.obolibrary.org/obo/",
596
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
597
+ }
598
+ end
599
+
600
+ #
601
+ # definition of classes
602
+ #
603
+
604
+ def sos_class
605
+ [
606
+ sos_class_general,
607
+ sos_class_fastqc_modules,
608
+ sos_class_for_values,
609
+ ].flatten
610
+ end
611
+
612
+ def sos_class_general
613
+ [
614
+ "SequenceStatisticsReport",
615
+ "SequenceStatisticsMatrix",
616
+ "Row",
617
+ "ExactBaseStatistics",
618
+ "BaseRangeStatistics",
619
+ ]
620
+ end
621
+
622
+ def sos_class_fastqc_modules
623
+ [
624
+ "PerBaseSequenceQuality",
625
+ "PerTileSequenceQuality",
626
+ "PerSequnceQualityScores",
627
+ "PerBaseSequenceContent",
628
+ "PerSequenceGCContent",
629
+ "PerBaseNContent",
630
+ "SequenceLengthDistribution",
631
+ "SequenceDuplicationLevels",
632
+ "OverrepresentedSequences",
633
+ "KmerContent",
634
+ ]
635
+ end
636
+
637
+ def sos_class_for_values
638
+ [
639
+ "PhredQualityScore",
640
+ "NucleotideBaseContent",
641
+ "SequenceReadContent",
642
+ "SequenceReadLength",
643
+ "SequenceDuplicationLevel",
644
+ ]
645
+ end
646
+
647
+ #
648
+ # definition of predicates
649
+ #
650
+
651
+ def sos_object_properties
652
+ [
653
+ "hasMatrix",
654
+ "totalSequences",
655
+ "filteredSequences",
656
+ "sequenceLength",
657
+ "percentGC",
658
+ "hasRow",
659
+ "basePosition",
660
+ "kmerSequence",
661
+ "meanBaseCallQuality",
662
+ "medianBaseCallQuality",
663
+ "nCount",
664
+ "observedPerExpectedMax",
665
+ "observedPerExpectedMaxPosition",
666
+ "observedPerExpectedOverall",
667
+ "percentAdenine",
668
+ "percentCytosine",
669
+ "percentGC",
670
+ "percentGuanine",
671
+ "percentThymine",
672
+ "sequenceDuplicationLevel",
673
+ "sequenceReadCount",
674
+ "sequenceReadLength",
675
+ "sequenceReadPercentage",
676
+ "sequenceReadRelativeCount",
677
+ "hasUnit",
678
+ "overallMeanBaseCallQuality",
679
+ "overallMedianBaseCallQuality",
680
+ "overallNContent",
681
+ ]
682
+ end
683
+
684
+ def sos_data_properties_string
685
+ [
686
+ "filename",
687
+ "fileType",
688
+ "encoding",
689
+ "possibleSourceOfSequence",
690
+ "overrepresentedSequence",
691
+ ]
692
+ end
693
+
694
+ def sos_data_properties_integer
695
+ [
696
+ "rowIndex",
697
+ ]
698
+ end
699
+
700
+ def sos_data_properties_float
701
+ [
702
+ "baseCallQuality",
703
+ "baseCallQuality10thPercentile",
704
+ "baseCallQuality90thPercentile",
705
+ "baseCallQualityLowerQuartile",
706
+ "baseCallQualityUpperQuartile",
707
+ "minSequenceLength",
708
+ "maxSequenceLength",
709
+ "meanSequenceLength",
710
+ "medianSequenceLength",
711
+ ]
712
+ end
713
+ end
714
+ end
715
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-fastqc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tazro Inutano Ohta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-11 00:00:00.000000000 Z
11
+ date: 2016-03-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -44,6 +44,34 @@ dependencies:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
46
  version: 0.19.1
47
+ - !ruby/object:Gem::Dependency
48
+ name: json-ld
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '1.99'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '1.99'
61
+ - !ruby/object:Gem::Dependency
62
+ name: rdf-turtle
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.99'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.99'
47
75
  - !ruby/object:Gem::Dependency
48
76
  name: bundler
49
77
  requirement: !ruby/object:Gem::Requirement
@@ -160,16 +188,16 @@ dependencies:
160
188
  name: bio-fastqc
161
189
  requirement: !ruby/object:Gem::Requirement
162
190
  requirements:
163
- - - ">="
191
+ - - '='
164
192
  - !ruby/object:Gem::Version
165
- version: '0'
193
+ version: 0.3.0
166
194
  type: :development
167
195
  prerelease: false
168
196
  version_requirements: !ruby/object:Gem::Requirement
169
197
  requirements:
170
- - - ">="
198
+ - - '='
171
199
  - !ruby/object:Gem::Version
172
- version: '0'
200
+ version: 0.3.0
173
201
  description: ruby parser for FastQC, a quality control software for high-throughput
174
202
  sequencing data.
175
203
  email: inutano@gmail.com
@@ -194,8 +222,10 @@ files:
194
222
  - lib/bio-fastqc.rb
195
223
  - lib/bio/fastqc.rb
196
224
  - lib/bio/fastqc/cli.rb
225
+ - lib/bio/fastqc/converter.rb
197
226
  - lib/bio/fastqc/data.rb
198
227
  - lib/bio/fastqc/parser.rb
228
+ - lib/bio/fastqc/semantics.rb
199
229
  - spec/bio-fastqc_spec.rb
200
230
  - spec/example_fastqc.zip
201
231
  - spec/spec_helper.rb