bio-rdf 0.0.1.pre1 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -1
- data/README.md +53 -3
- data/VERSION +1 -1
- data/bin/bio-rdf +35 -4
- data/doc/design.md +20 -0
- data/features/parse_broad_gsea_cls.feature +18 -0
- data/features/parse_broad_gsea_cls.rb +13 -0
- data/features/parse_broad_gsea_results.feature +29 -0
- data/features/parse_broad_gsea_results.rb +59 -0
- data/features/support/env.rb +13 -0
- data/lib/bio-rdf.rb +2 -1
- data/lib/bio-rdf/parsers/gsea/broadgsea.rb +161 -0
- data/spec/spec_helper.rb +12 -0
- data/test/data/parsers/gsea/Run1_C2.SUMMARY.RESULTS.REPORT.0.txt +1066 -0
- data/test/data/parsers/gsea/Run1_C2.SUMMARY.RESULTS.REPORT.1.txt +474 -0
- metadata +62 -21
- data/README.rdoc +0 -46
data/Gemfile
CHANGED
@@ -2,13 +2,16 @@ source "http://rubygems.org"
|
|
2
2
|
# Add dependencies required to use your gem here.
|
3
3
|
# Example:
|
4
4
|
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem "bio-logger"
|
5
6
|
|
6
7
|
# Add dependencies to develop your gem here.
|
7
8
|
# Include everything needed to run rake, tests, features, etc.
|
8
9
|
group :development do
|
9
10
|
gem "shoulda", ">= 0"
|
11
|
+
gem "rspec", ">= 0"
|
12
|
+
gem "cucumber", ">= 0"
|
10
13
|
gem "rdoc", "~> 3.12"
|
11
|
-
gem "bundler", "
|
14
|
+
gem "bundler", ">= 1.0.0"
|
12
15
|
gem "jeweler", "~> 1.8.3"
|
13
16
|
gem "bio", ">= 1.4.2"
|
14
17
|
gem "rdoc", "~> 3.12"
|
data/README.md
CHANGED
@@ -2,9 +2,59 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://secure.travis-ci.org/pjotrp/bioruby-rdf.png)](http://travis-ci.org/pjotrp/bioruby-rdf)
|
4
4
|
|
5
|
-
|
5
|
+
Library and tools for using a triple-store with biological data. It
|
6
|
+
includes tools for storing parsed data into a triple store. The name
|
7
|
+
includes RDF, the XML representation of triples, but that really is
|
8
|
+
too a narrow view of the purpose of this biogem. The alternative names
|
9
|
+
(bio-semweb and bio-triplestore) looked even worse.
|
6
10
|
|
7
|
-
|
11
|
+
Every data-type has a Parser module. This parser module controls the
|
12
|
+
parsing flow. The actual parsing is handled by lower level routines,
|
13
|
+
which may even reside in other libraries, such as BioRuby. The basic
|
14
|
+
flow is
|
15
|
+
|
16
|
+
input -> parse -> output
|
17
|
+
|
18
|
+
The *input* can be anything, from directories, files to web based
|
19
|
+
resources.
|
20
|
+
|
21
|
+
The *output* of the parser should be in some form of triple format,
|
22
|
+
though simple tab delimited tables can also be supported (depending on
|
23
|
+
the parser).
|
24
|
+
|
25
|
+
The first functionality includes parsing the results of gene set
|
26
|
+
enrichment analysis
|
27
|
+
([GSEA](http://www.broadinstitute.org/gsea/index.jsp)) into triples
|
28
|
+
(more below).
|
29
|
+
|
30
|
+
This project is linked with next generation sequencing, genome
|
31
|
+
browsing, visualisation and QTL mapping. E.g.
|
32
|
+
|
33
|
+
* [bio-ngs](http://www.biogems.info/#bio-ngs)
|
34
|
+
* [bio-bio-ucsc-api](http://www.biogems.info/#bio-ucsc-api)
|
35
|
+
* [bio-qtlHD](http://www.biogems.info/#bio-qtlHD)
|
36
|
+
|
37
|
+
Note: this software is under active development! See also the [design
|
38
|
+
doc](https://github.com/pjotrp/bioruby-rdf/blob/master/doc/design.md).
|
39
|
+
|
40
|
+
## Examples
|
41
|
+
|
42
|
+
### Gene set enrichment analysis (GSEA)
|
43
|
+
|
44
|
+
GSEA is a computational method that determines whether an a priori
|
45
|
+
defined set of genes shows statistically significant, concordant
|
46
|
+
differences between two biological states. The [GSEA
|
47
|
+
tool](http://www.broadinstitute.org/gsea/index.jsp) produces two
|
48
|
+
result files for every two biological states. We wrote a parser
|
49
|
+
for the summary files, which outputs either a single table of results
|
50
|
+
(based on a cut-off value). This table can be converted into a
|
51
|
+
triple-store.
|
52
|
+
|
53
|
+
To create a tab delimited file from a GSEA result, where FDR < 0.25
|
54
|
+
|
55
|
+
```bash
|
56
|
+
bio-rdf gsea --tabulate --exec "rec.fdr <= 0.25" ./gsea/output/ > results.txt
|
57
|
+
```
|
8
58
|
|
9
59
|
## Installation
|
10
60
|
|
@@ -15,7 +65,7 @@ Note: this software is under active development!
|
|
15
65
|
## Usage
|
16
66
|
|
17
67
|
```ruby
|
18
|
-
require 'bio-rdf
|
68
|
+
require 'bio-rdf'
|
19
69
|
```
|
20
70
|
|
21
71
|
The API doc is online. For more code examples see the test files in
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.1
|
1
|
+
0.0.1
|
data/bin/bio-rdf
CHANGED
@@ -4,7 +4,24 @@
|
|
4
4
|
# Author:: Pjotr Prins
|
5
5
|
# Copyright:: 2012
|
6
6
|
|
7
|
-
|
7
|
+
rootpath = File.dirname(File.dirname(__FILE__))
|
8
|
+
$: << File.join(rootpath,'lib')
|
9
|
+
|
10
|
+
version = File.new(File.join(rootpath,'VERSION')).read.chomp
|
11
|
+
|
12
|
+
print "bio-rdf #{version} by Pjotr Prins (c) 2012\n"
|
13
|
+
|
14
|
+
USAGE = <<EOP
|
15
|
+
|
16
|
+
Usage: bio-rdf command [options]
|
17
|
+
|
18
|
+
Valid commands reflect parsers and are:
|
19
|
+
|
20
|
+
gsea : Gene set enrichment analysis
|
21
|
+
|
22
|
+
For more information on a command use the --help switch
|
23
|
+
|
24
|
+
EOP
|
8
25
|
|
9
26
|
if ARGV.size == 0
|
10
27
|
print USAGE
|
@@ -12,11 +29,20 @@ end
|
|
12
29
|
|
13
30
|
require 'bio-rdf'
|
14
31
|
require 'optparse'
|
32
|
+
require 'ostruct'
|
15
33
|
|
16
34
|
# Uncomment when using the bio-logger
|
17
|
-
|
18
|
-
|
19
|
-
|
35
|
+
require 'bio-logger'
|
36
|
+
|
37
|
+
Bio::Log::CLI.logger('stderr')
|
38
|
+
Bio::Log::CLI.trace('info')
|
39
|
+
|
40
|
+
case ARGV[0]
|
41
|
+
when 'gsea'
|
42
|
+
ARGV.shift
|
43
|
+
BioRdf::Parsers::BroadGSEA::Parser::handle_options
|
44
|
+
exit 0
|
45
|
+
end
|
20
46
|
|
21
47
|
options = {:example_switch=>false,:show_help=>false}
|
22
48
|
opts = OptionParser.new do |o|
|
@@ -64,6 +90,11 @@ end
|
|
64
90
|
begin
|
65
91
|
opts.parse!(ARGV)
|
66
92
|
|
93
|
+
if options[:show_help]
|
94
|
+
print USAGE
|
95
|
+
exit 0
|
96
|
+
end
|
97
|
+
|
67
98
|
# Uncomment the following when using the bio-logger
|
68
99
|
# Bio::Log::CLI.configure('bio-rdf')
|
69
100
|
|
data/doc/design.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Semantic web for BioRuby!
|
2
|
+
|
3
|
+
In this document we describe using a triple store for bioinformatics,
|
4
|
+
mostly using Ruby. While the semantic is still, mostly, vapourware in
|
5
|
+
biology, the ideas and tools can be very useful for reasoning about
|
6
|
+
relationships between genes, pathways, enrichment etc. In this library
|
7
|
+
we aim to use a local triple store, feed it with information, query it
|
8
|
+
using [SPARQL](http://en.wikipedia.org/wiki/SPARQL), and provide it
|
9
|
+
with a nice user interface for biologists. Triples may link-out to
|
10
|
+
other semantic web connections.
|
11
|
+
|
12
|
+
Enjoy,
|
13
|
+
|
14
|
+
Pjotr Prins
|
15
|
+
|
16
|
+
## Loading the triple store
|
17
|
+
|
18
|
+
## Querying the triple store
|
19
|
+
|
20
|
+
## User interface
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Feature: Parse GSEA cls file
|
2
|
+
To get the phenotype class in a Broad Institute GSEA result
|
3
|
+
we need to parse the CLS file:
|
4
|
+
Categorical (e.g tumor vs normal) class file format (*.cls)
|
5
|
+
|
6
|
+
The CLS file format defines phenotype (class or template) labels and
|
7
|
+
associates each sample in the expression data with a label. The CLS file
|
8
|
+
format uses spaces or tabs to separate the fields.
|
9
|
+
|
10
|
+
Scenario: Parse CLS file
|
11
|
+
Given I have a CLS file which contains
|
12
|
+
"""
|
13
|
+
26 2 1
|
14
|
+
# RS13482013 RS13482013_1
|
15
|
+
0 0 0 1 1 0 1 0 0 1 0 1 0 1 1 1 0 1 1 1 1 1 0 0 1 0
|
16
|
+
"""
|
17
|
+
Then I should fetch the phenotype names RS13482013 and RS13482013_1
|
18
|
+
And I should be able to fetch the classes into an array
|
@@ -0,0 +1,13 @@
|
|
1
|
+
Given /^I have a CLS file which contains$/ do |buf|
|
2
|
+
@rec = BioRdf::Parsers::BroadGSEA::ParseClsRecord.new(buf)
|
3
|
+
end
|
4
|
+
|
5
|
+
Then /^I should fetch the phenotype names RS(\d+) and RS(\d+)_(\d+)$/ do |arg1, arg2, arg3|
|
6
|
+
@rec.classnames.should == ['RS13482013','RS13482013_1']
|
7
|
+
end
|
8
|
+
|
9
|
+
Then /^I should be able to fetch the classes into an array$/ do
|
10
|
+
@rec.classes.should ==
|
11
|
+
["0", "0", "0", "1", "1", "0", "1", "0", "0", "1", "0", "1", "0", "1", "1", "1", "0", "1", "1", "1", "1", "1", "0", "0", "1", "0"]
|
12
|
+
end
|
13
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Feature: Parse GSEA results
|
2
|
+
To get the enrichment values in a Broad Institute GSEA result file
|
3
|
+
we need to parse the tab delimited results file. An example is
|
4
|
+
|
5
|
+
GS SIZE SOURCE ES NES NOM p-val FDR q-val FWER p-val Tag \% Gene \% Signal FDR (median) glob.p.val
|
6
|
+
BIOCARTA_RACCYCD_PATHWAY 25 http://www.broadinstitute.org/gsea/msigdb/cards/BIOCARTA_RACCYCD_PATHWAY.html 0.55588 1.7947 0.004149 1 0.647 0.44 0.198 0.354 1 0.633
|
7
|
+
REACTOME_MRNA_3_END_PROCESSING 31 http://www.broadinstitute.org/gsea/msigdb/cards/REACTOME_MRNA_3_END_PROCESSING.html 0.6396 1.7613 0 1 0.752 0.613 0.242 0.466 1 0.579
|
8
|
+
(...)
|
9
|
+
|
10
|
+
Scenario: Parse one line in a Broad GSEA results file
|
11
|
+
Given I have a Broad GSEA results file which contains the line
|
12
|
+
"""
|
13
|
+
BIOCARTA_RACCYCD_PATHWAY 25 http://www.broadinstitute.org/gsea/msigdb/cards/BIOCARTA_RACCYCD_PATHWAY.html 0.55588 1.7947 0.004149 1 0.647 0.44 0.198 0.354 1 0.633
|
14
|
+
"""
|
15
|
+
Then I should be able to the name of the geneset BIOCARTA_RACCYCD_PATHWAY
|
16
|
+
And I should be able to fetch all values as a list
|
17
|
+
And I should be able to fetch all other values (lazily), where
|
18
|
+
And I should be able to fetch the source
|
19
|
+
And ES is 0.55588
|
20
|
+
And NES is 1.7947
|
21
|
+
And p-value is 0.004149
|
22
|
+
And FDR is 1
|
23
|
+
And global p-value is 0.633
|
24
|
+
And Median FDR is 1
|
25
|
+
|
26
|
+
Scenario: Parse a Broad GSEA results file and filter results
|
27
|
+
Given I have a Broad GSEA results file with multiple lines
|
28
|
+
Then I should be able to return all records with an FDR of less than 0.25
|
29
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
Given /^I have a Broad GSEA results file which contains the line$/ do |string|
|
2
|
+
@rec = BioRdf::Parsers::BroadGSEA::ParseResultRecord.new(string.gsub(/\s+/,"\t"))
|
3
|
+
end
|
4
|
+
|
5
|
+
Then /^I should be able to fetch all values as a list$/ do
|
6
|
+
@rec.to_list.should == ["BIOCARTA_RACCYCD_PATHWAY", "25", "http://www.broadinstitute.org/gsea/msigdb/cards/BIOCARTA_RACCYCD_PATHWAY.html", "0.55588", "1.7947", "0.004149", "1", "0.647", "0.44", "0.198", "0.354", "1", "0.633"]
|
7
|
+
end
|
8
|
+
|
9
|
+
Then /^I should be able to fetch all other values \(lazily\), where$/ do
|
10
|
+
end
|
11
|
+
|
12
|
+
Then /^I should be able to the name of the geneset BIOCARTA_RACCYCD_PATHWAY$/ do
|
13
|
+
@rec.geneset_name.should == "BIOCARTA_RACCYCD_PATHWAY"
|
14
|
+
end
|
15
|
+
|
16
|
+
Then /^I should be able to fetch the source$/ do
|
17
|
+
@rec.source.should == "http://www.broadinstitute.org/gsea/msigdb/cards/BIOCARTA_RACCYCD_PATHWAY.html"
|
18
|
+
end
|
19
|
+
|
20
|
+
Then /^ES is (\d+)\.(\d+)$/ do |arg1, arg2|
|
21
|
+
@rec.es.should == (arg1+'.'+arg2).to_f
|
22
|
+
end
|
23
|
+
|
24
|
+
Then /^NES is (\d+)\.(\d+)$/ do |arg1, arg2|
|
25
|
+
@rec.nes.should == (arg1+'.'+arg2).to_f
|
26
|
+
end
|
27
|
+
|
28
|
+
Then /^p\-value is (\d+)\.(\d+)$/ do |arg1, arg2|
|
29
|
+
@rec.nominal_p_value.should == (arg1+'.'+arg2).to_f
|
30
|
+
end
|
31
|
+
|
32
|
+
Then /^FDR is (\d+)$/ do |arg1|
|
33
|
+
@rec.fdr.should == (arg1).to_f
|
34
|
+
end
|
35
|
+
|
36
|
+
Then /^q\-value is (\d+)\.(\d+)$/ do |arg1, arg2|
|
37
|
+
@rec.fdr_q_value.should == (arg1+'.'+arg2).to_f
|
38
|
+
end
|
39
|
+
|
40
|
+
Then /^global p\-value is (\d+)\.(\d+)$/ do |arg1, arg2|
|
41
|
+
@rec.global_p_value.should == (arg1+'.'+arg2).to_f
|
42
|
+
end
|
43
|
+
|
44
|
+
Then /^Median FDR is (\d+)$/ do |arg1|
|
45
|
+
@rec.median_fdr.should == (arg1).to_f
|
46
|
+
end
|
47
|
+
|
48
|
+
# --- multi line parsing
|
49
|
+
|
50
|
+
Given /^I have a Broad GSEA results file with multiple lines$/ do
|
51
|
+
@gsea_results = BioRdf::Parsers::BroadGSEA::ParseResultFile.new("./test/data/parsers/gsea/Run1_C2.SUMMARY.RESULTS.REPORT.0.txt")
|
52
|
+
end
|
53
|
+
|
54
|
+
Then /^I should be able to return all records with an FDR of less than (\d+)\.(\d+)$/ do |arg1, arg2|
|
55
|
+
recs = @gsea_results.find_all { | rec | rec.fdr_q_value < 0.85 }
|
56
|
+
recs.size.should == 70
|
57
|
+
end
|
58
|
+
|
59
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
begin
|
3
|
+
Bundler.setup(:default, :development)
|
4
|
+
rescue Bundler::BundlerError => e
|
5
|
+
$stderr.puts e.message
|
6
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
+
exit e.status_code
|
8
|
+
end
|
9
|
+
|
10
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
|
11
|
+
require 'bio-rdf'
|
12
|
+
|
13
|
+
require 'rspec/expectations'
|
data/lib/bio-rdf.rb
CHANGED
@@ -0,0 +1,161 @@
|
|
1
|
+
module BioRdf
|
2
|
+
module Parsers
|
3
|
+
module BroadGSEA
|
4
|
+
|
5
|
+
# Parses a 3 line CLS record (see features for an example)
|
6
|
+
class ParseClsRecord
|
7
|
+
attr_reader :classnames, :classes
|
8
|
+
def initialize buf
|
9
|
+
lines = buf.split("\n")
|
10
|
+
raise "CLS record should be 3 lines" if lines.size != 3
|
11
|
+
classline = lines[1]
|
12
|
+
raise "Second line should start with #" if classline[0] != "#"
|
13
|
+
@classnames = classline.split(/\s+/)[1..2]
|
14
|
+
@classes = lines[2].split(/\s+/)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Parses a single line result lazily (see features for an example)
|
19
|
+
#
|
20
|
+
# GS SIZE SOURCE ES NES NOM-p-val FDR-q-val FWER-p-val Tag% Gene% Signal FDR_(median) glob.p.val
|
21
|
+
class ParseResultRecord
|
22
|
+
def initialize string
|
23
|
+
@fields = string.strip.split(/\t/)
|
24
|
+
end
|
25
|
+
def to_list
|
26
|
+
@fields
|
27
|
+
end
|
28
|
+
def geneset_name
|
29
|
+
@fields[0]
|
30
|
+
end
|
31
|
+
def source
|
32
|
+
@fields[2]
|
33
|
+
end
|
34
|
+
# ES: Enrichment score for the gene set; that is, the degree to which
|
35
|
+
# this gene set is overrepresented at the top or bottom of the ranked
|
36
|
+
# list of genes in the expression dataset.
|
37
|
+
def es
|
38
|
+
@es ||= @fields[3].to_f
|
39
|
+
end
|
40
|
+
# NES: Normalized enrichment score; that is, the enrichment score for
|
41
|
+
# the gene set after it has been normalized across analyzed gene sets.
|
42
|
+
def nes
|
43
|
+
@nes ||= @fields[4].to_f
|
44
|
+
end
|
45
|
+
# NOM p-value: Nominal p value; that is, the statistical significance
|
46
|
+
# of the enrichment score. The nominal p value is not adjusted for gene
|
47
|
+
# set size or multiple hypothesis testing; therefore, it is of limited
|
48
|
+
# use in comparing gene sets.
|
49
|
+
def nominal_p_value
|
50
|
+
@nominal_p_value ||= @fields[5].to_f
|
51
|
+
end
|
52
|
+
# FDR q-value: False discovery rate; that is, the estimated probability
|
53
|
+
# that the normalized enrichment score (NES) represents a false
|
54
|
+
# positive finding. For example, an FDR of 25% indicates that the
|
55
|
+
# result is likely to be valid 3 out of 4 times.
|
56
|
+
def fdr_q_value
|
57
|
+
@fdr_q_value ||= @fields[6].to_f
|
58
|
+
end
|
59
|
+
alias :fdr :fdr_q_value
|
60
|
+
|
61
|
+
# FWER p-value: Familywise-error rate; that is, a more conservatively
|
62
|
+
# estimated probability that the normalized enrichment score represents
|
63
|
+
# a false positive finding. Because the goal of GSEA is to generate
|
64
|
+
# hypotheses, the GSEA team recommends focusing on the FDR statistic.
|
65
|
+
def fwer_p_value
|
66
|
+
@fwer_p_value ||= @fields[7].to_f
|
67
|
+
end
|
68
|
+
def signal
|
69
|
+
@signal ||= @fields[10].to_f
|
70
|
+
end
|
71
|
+
def median_fdr
|
72
|
+
@median_fdr ||= @fields[11].to_f
|
73
|
+
end
|
74
|
+
def global_p_value
|
75
|
+
@global_p_value ||= @fields[12].to_f
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class ParseResultFile
|
80
|
+
include Enumerable
|
81
|
+
def initialize filename
|
82
|
+
@list = []
|
83
|
+
f = File.open(filename)
|
84
|
+
f.gets # skip header
|
85
|
+
f.each_line do | line |
|
86
|
+
@list << ParseResultRecord.new(line)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
def each
|
90
|
+
@list.each do | rec |
|
91
|
+
yield rec
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
module Parser
|
97
|
+
|
98
|
+
def Parser::handle_options
|
99
|
+
options = OpenStruct.new()
|
100
|
+
|
101
|
+
opts = OptionParser.new() do |o|
|
102
|
+
o.banner = "Usage: #{File.basename($0)} gsea [options] dir"
|
103
|
+
|
104
|
+
o.on_tail("-h", "--help", "Show help and examples") {
|
105
|
+
print(o)
|
106
|
+
exit()
|
107
|
+
}
|
108
|
+
o.on("-e filter","--exec filter",String, "Execute filter") do |s|
|
109
|
+
options.exec = s
|
110
|
+
end
|
111
|
+
|
112
|
+
o.on("--tabulate","Output tab delimited table") do
|
113
|
+
options.output = :tabulate
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
opts.parse!(ARGV)
|
118
|
+
dir = ARGV[0]
|
119
|
+
if dir and File.directory?(dir)
|
120
|
+
do_parse(dir, options.exec, options.output)
|
121
|
+
else
|
122
|
+
raise "you should supply a GSEA directory!"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
require 'bio-logger'
|
127
|
+
include Bio::Log
|
128
|
+
|
129
|
+
def Parser::do_parse input, filter, output
|
130
|
+
log = LoggerPlus.new 'gsea'
|
131
|
+
log.level = INFO
|
132
|
+
log.outputters = Outputter.stderr
|
133
|
+
log.warn("Fetching "+input)
|
134
|
+
print "Marker\tGenotype\tGS\tSIZE\tSOURCE\tES\tNES\tNOM p-val\tFDR q-val\tFWER p-val\tTag \%\tGene \%\tSignal\tFDR (median)\tglob.p.val\n"
|
135
|
+
Dir.foreach(input) do |entry| # two step search, because of many dirs
|
136
|
+
next if entry == '.' or entry == '..'
|
137
|
+
log.info("Parsing directory "+entry)
|
138
|
+
resultfilenames = File.join(input,entry,"*SUMMARY.RESULTS.REPORT.[01].txt")
|
139
|
+
clsfilename = File.join(input,entry,"cls")
|
140
|
+
# log.info(resultfilenames)
|
141
|
+
Dir.glob(resultfilenames) do |fn|
|
142
|
+
genotype = "A"
|
143
|
+
genotype = "B" if fn =~ /1.txt/
|
144
|
+
marker = "unknown"
|
145
|
+
# fetch marker name
|
146
|
+
if File.exist?(clsfilename)
|
147
|
+
cls = BioRdf::Parsers::BroadGSEA::ParseClsRecord.new(File.read(clsfilename))
|
148
|
+
marker = cls.classnames[0]
|
149
|
+
end
|
150
|
+
gsea_results = BioRdf::Parsers::BroadGSEA::ParseResultFile.new(fn)
|
151
|
+
recs = gsea_results.find_all { | rec | rec.fdr_q_value <= 0.25 }
|
152
|
+
recs.each do | rec |
|
153
|
+
print "#{marker}\t#{genotype}\t"+rec.to_list.join("\t"),"\n"
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|