rbbt 1.1.7 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.rdoc +2 -138
- metadata +72 -136
- data/LICENSE +0 -20
- data/bin/rbbt_config +0 -246
- data/install_scripts/classifier/R/classify.R +0 -36
- data/install_scripts/classifier/Rakefile +0 -145
- data/install_scripts/get_abner.sh +0 -2
- data/install_scripts/get_banner.sh +0 -25
- data/install_scripts/get_biocreative.sh +0 -72
- data/install_scripts/get_crf++.sh +0 -26
- data/install_scripts/get_entrez.sh +0 -4
- data/install_scripts/get_go.sh +0 -4
- data/install_scripts/get_polysearch.sh +0 -8
- data/install_scripts/ner/Rakefile +0 -206
- data/install_scripts/ner/config/default.rb +0 -52
- data/install_scripts/norm/Rakefile +0 -219
- data/install_scripts/norm/config/cue_default.rb +0 -10
- data/install_scripts/norm/config/tokens_default.rb +0 -79
- data/install_scripts/norm/functions.sh +0 -23
- data/install_scripts/organisms/Rakefile +0 -43
- data/install_scripts/organisms/cgd.Rakefile +0 -84
- data/install_scripts/organisms/human.Rakefile +0 -145
- data/install_scripts/organisms/mgi.Rakefile +0 -77
- data/install_scripts/organisms/pombe.Rakefile +0 -40
- data/install_scripts/organisms/rake-include.rb +0 -258
- data/install_scripts/organisms/rgd.Rakefile +0 -88
- data/install_scripts/organisms/sgd.Rakefile +0 -66
- data/install_scripts/organisms/tair.Rakefile +0 -54
- data/install_scripts/organisms/worm.Rakefile +0 -109
- data/install_scripts/wordlists/consonants +0 -897
- data/install_scripts/wordlists/stopwords +0 -1
- data/lib/rbbt.rb +0 -86
- data/lib/rbbt/bow/bow.rb +0 -88
- data/lib/rbbt/bow/classifier.rb +0 -116
- data/lib/rbbt/bow/dictionary.rb +0 -187
- data/lib/rbbt/ner/abner.rb +0 -34
- data/lib/rbbt/ner/banner.rb +0 -73
- data/lib/rbbt/ner/dictionaryNER.rb +0 -98
- data/lib/rbbt/ner/regexpNER.rb +0 -70
- data/lib/rbbt/ner/rner.rb +0 -227
- data/lib/rbbt/ner/rnorm.rb +0 -143
- data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
- data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
- data/lib/rbbt/sources/biocreative.rb +0 -75
- data/lib/rbbt/sources/biomart.rb +0 -105
- data/lib/rbbt/sources/entrez.rb +0 -211
- data/lib/rbbt/sources/go.rb +0 -40
- data/lib/rbbt/sources/organism.rb +0 -245
- data/lib/rbbt/sources/polysearch.rb +0 -117
- data/lib/rbbt/sources/pubmed.rb +0 -111
- data/lib/rbbt/util/arrayHash.rb +0 -255
- data/lib/rbbt/util/filecache.rb +0 -72
- data/lib/rbbt/util/index.rb +0 -47
- data/lib/rbbt/util/misc.rb +0 -106
- data/lib/rbbt/util/open.rb +0 -235
- data/lib/rbbt/util/rake.rb +0 -183
- data/lib/rbbt/util/simpleDSL.rb +0 -87
- data/lib/rbbt/util/tmpfile.rb +0 -19
- data/tasks/install.rake +0 -124
data/lib/rbbt/util/rake.rb
DELETED
@@ -1,183 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
|
3
|
-
# Include the step_def and step methods to simplify Pipelines. Steps depend on
|
4
|
-
# the step strictly above by default. The output of the step is save marshaled,
|
5
|
-
# except for Strings which are save as text. The input of the step, the output
|
6
|
-
# of the previous step if availabe is accessed with the input method
|
7
|
-
#
|
8
|
-
# Example::
|
9
|
-
#
|
10
|
-
# step :text do
|
11
|
-
# "Text to revert"
|
12
|
-
# end
|
13
|
-
#
|
14
|
-
# step :revert do
|
15
|
-
# text = input
|
16
|
-
# text.reverse
|
17
|
-
# end
|
18
|
-
#
|
19
|
-
module Rake::Pipeline
|
20
|
-
|
21
|
-
module Rake::Pipeline::Step
|
22
|
-
|
23
|
-
class << self
|
24
|
-
|
25
|
-
@@step_descriptions = {}
|
26
|
-
def step_descriptions
|
27
|
-
@@step_descriptions
|
28
|
-
end
|
29
|
-
|
30
|
-
def add_description(re, step, message)
|
31
|
-
@@step_descriptions[re] = "#{ step }: #{ message }"
|
32
|
-
end
|
33
|
-
|
34
|
-
@@last_step = nil
|
35
|
-
def step_def(name, dependencies = nil)
|
36
|
-
|
37
|
-
re = Regexp.new(/(?:^|\/)#{name}\/.*$/)
|
38
|
-
|
39
|
-
# Take the last_description and associate it with the name
|
40
|
-
if Rake.application.last_description
|
41
|
-
add_description(re, name, Rake.application.last_description)
|
42
|
-
end
|
43
|
-
|
44
|
-
if dependencies.nil? && ! @@last_step.nil?
|
45
|
-
dependencies = @@last_step
|
46
|
-
end
|
47
|
-
@@last_step = name
|
48
|
-
|
49
|
-
# Generate the Hash definition
|
50
|
-
case
|
51
|
-
when dependencies.nil?
|
52
|
-
re
|
53
|
-
when String === dependencies || Symbol === dependencies
|
54
|
-
{re => lambda{|filename| filename.sub(name.to_s,dependencies.to_s) }}
|
55
|
-
when Array === dependencies
|
56
|
-
{re => lambda{|filename| dependencies.collect{|dep| filename.sub(name.to_s, dep.to_s) } }}
|
57
|
-
when Proc === dependencies
|
58
|
-
{re => dependencies}
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
module Rake::Pipeline::Info
|
67
|
-
|
68
|
-
def self.info_file(filename)
|
69
|
-
filename.sub(/^(.*?)(?:[^\/]*)\/([^\/]*)$/, '\1.info/\2.yaml')
|
70
|
-
end
|
71
|
-
|
72
|
-
def self.load_info(t)
|
73
|
-
filename = t.name
|
74
|
-
info_filename = info_file(filename)
|
75
|
-
|
76
|
-
if File.exists? info_filename
|
77
|
-
YAML.load(File.open(info_filename))
|
78
|
-
else
|
79
|
-
{}
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def self.save_info(t, info = {})
|
84
|
-
filename = t.name
|
85
|
-
info_filename = info_file(filename)
|
86
|
-
|
87
|
-
FileUtils.mkdir_p File.dirname(info_filename) unless File.exists? File.dirname(info_filename)
|
88
|
-
File.open(info_filename,'w'){|file|
|
89
|
-
file.write YAML.dump info
|
90
|
-
}
|
91
|
-
end
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
|
96
|
-
NON_ASCII_PRINTABLE = /[^\x20-\x7e\s]/
|
97
|
-
def is_binary?(file)
|
98
|
-
binary = file.read(1024) =~ NON_ASCII_PRINTABLE
|
99
|
-
file.rewind
|
100
|
-
binary
|
101
|
-
end
|
102
|
-
|
103
|
-
def step_descriptions
|
104
|
-
Rake::Pipeline::Step.step_descriptions
|
105
|
-
end
|
106
|
-
|
107
|
-
|
108
|
-
def step_def(*args)
|
109
|
-
Rake::Pipeline::Step.step_def(*args)
|
110
|
-
end
|
111
|
-
|
112
|
-
def infile(t, &block)
|
113
|
-
File.open(t.prerequisites.first) do |f|
|
114
|
-
block.call(f)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def outfile(t, &block)
|
119
|
-
File.open(t.name, 'w') do |f|
|
120
|
-
block.call(f)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
def load_input(t)
|
125
|
-
return nil if t.prerequisites.first.nil?
|
126
|
-
infile(t){|f|
|
127
|
-
if is_binary?(f)
|
128
|
-
Marshal.load(f)
|
129
|
-
else
|
130
|
-
f.read
|
131
|
-
end
|
132
|
-
}
|
133
|
-
end
|
134
|
-
|
135
|
-
def save_output(t, output)
|
136
|
-
case
|
137
|
-
when output.nil?
|
138
|
-
nil
|
139
|
-
when String === output
|
140
|
-
outfile(t){|f| f.write output }
|
141
|
-
else
|
142
|
-
outfile(t){|f| f.write Marshal.dump(output) }
|
143
|
-
end
|
144
|
-
|
145
|
-
end
|
146
|
-
|
147
|
-
# We cannot load the input variable before the block.call, so we need another method
|
148
|
-
|
149
|
-
# Load the input data from the previous step
|
150
|
-
def input
|
151
|
-
load_input(@@current_task) if @@current_task
|
152
|
-
end
|
153
|
-
|
154
|
-
# Add values to the info file
|
155
|
-
def info(values = {})
|
156
|
-
info = Rake::Pipeline::Info.load_info(@@current_task)
|
157
|
-
info = info.merge values
|
158
|
-
Rake::Pipeline::Info.save_info(@@current_task, info)
|
159
|
-
info
|
160
|
-
end
|
161
|
-
|
162
|
-
|
163
|
-
# Define a new step, it depends on the previously defined by default. It
|
164
|
-
# saves the output of the block so it can be loaded by the input method of
|
165
|
-
# the next step
|
166
|
-
def step(name, dependencies = nil, &block)
|
167
|
-
rule step_def(name, dependencies) do |t|
|
168
|
-
|
169
|
-
# Save the task object to be able to load the input
|
170
|
-
@@current_task = t
|
171
|
-
|
172
|
-
output = block.call(t)
|
173
|
-
|
174
|
-
save_output(t, output)
|
175
|
-
end
|
176
|
-
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
if __FILE__ == $0
|
181
|
-
|
182
|
-
p Rake::Pipeline::Info.info_file('work/diseases/t')
|
183
|
-
end
|
data/lib/rbbt/util/simpleDSL.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
require 'parse_tree_extensions'
|
2
|
-
require 'parse_tree'
|
3
|
-
require 'ruby2ruby'
|
4
|
-
|
5
|
-
# This class helps designing DSL in ruby based on method_missing. Class
|
6
|
-
# is initialize with a block of code or a file with the code, and it is
|
7
|
-
# given a method to be invoked instead of method missing. This class
|
8
|
-
# deals simply with making the method_missing alias and removing it and
|
9
|
-
# executing the block of file with code.
|
10
|
-
class SimpleDSL
|
11
|
-
|
12
|
-
class ConfigFileMissingError < StandardError; end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def hook_method(method = nil)
|
17
|
-
method ||= :DSL_action
|
18
|
-
@@restore_name = ("restore_DSL_" + method.to_s).to_sym
|
19
|
-
@@method_name = method.to_sym
|
20
|
-
|
21
|
-
class << self
|
22
|
-
@restore_stack ||= []
|
23
|
-
@restore_stack << @@restore_name
|
24
|
-
alias_method(@@restore_name, :method_missing)
|
25
|
-
alias_method(:method_missing, @@method_name)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def unhook_method
|
30
|
-
class << self
|
31
|
-
alias_method(:method_missing, @restore_stack.pop)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
public
|
36
|
-
|
37
|
-
def parse(method = nil, actions = nil, &block)
|
38
|
-
|
39
|
-
actions ||= block
|
40
|
-
|
41
|
-
hook_method(method)
|
42
|
-
|
43
|
-
# Execute
|
44
|
-
if actions.is_a? Proc
|
45
|
-
|
46
|
-
@config[@@method_name] = actions.to_ruby.collect[1..-2].join
|
47
|
-
|
48
|
-
instance_eval &actions
|
49
|
-
elsif File.exists?(actions)
|
50
|
-
|
51
|
-
@config[@@method_name] = File.open(actions).read
|
52
|
-
|
53
|
-
eval File.open(actions).read
|
54
|
-
end
|
55
|
-
|
56
|
-
unhook_method
|
57
|
-
|
58
|
-
end
|
59
|
-
|
60
|
-
|
61
|
-
# Processes a DSL. +method+ is the name of the method executed instead
|
62
|
-
# of method_missing. The code to be evaluated as a DSL is either
|
63
|
-
# specified in +&block+ or in the file pointed by +file+.
|
64
|
-
def initialize(method = nil, file = nil, &block)
|
65
|
-
@config = {}
|
66
|
-
if file
|
67
|
-
raise ConfigFileMissingError.new "File '#{ file }' is missing. Have you installed the config files? (use rbbt_config)." unless File.exists? file
|
68
|
-
parse(method, file)
|
69
|
-
end
|
70
|
-
|
71
|
-
if block
|
72
|
-
parse(method, block)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
# Returns the code with the DSL that was executed. If it came from a
|
77
|
-
# block it was turned to string using ruby2ruby.
|
78
|
-
def config(action = nil)
|
79
|
-
if action
|
80
|
-
@config[action.to_sym]
|
81
|
-
else
|
82
|
-
@config[:DSL_action]
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
|
data/lib/rbbt/util/tmpfile.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
require 'rbbt'
|
3
|
-
|
4
|
-
|
5
|
-
module TmpFile
|
6
|
-
|
7
|
-
# Creates a random file name, with the given suffix and a random number
|
8
|
-
# up to +max+
|
9
|
-
def self.random_name( s="",max=10000000)
|
10
|
-
n = rand(max)
|
11
|
-
s << n.to_s
|
12
|
-
s
|
13
|
-
end
|
14
|
-
|
15
|
-
# Creates a random filename in the temporary directory
|
16
|
-
def self.tmp_file(s = "",max=10000000)
|
17
|
-
File.join(Rbbt.tmpdir,random_name(s,max))
|
18
|
-
end
|
19
|
-
end
|
data/tasks/install.rake
DELETED
@@ -1,124 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
|
3
|
-
$datadir = Rbbt.datadir
|
4
|
-
$scriptdir = File.join(Rbbt.rootdir, '/install_scripts')
|
5
|
-
|
6
|
-
|
7
|
-
task 'abner' do
|
8
|
-
directory = "#{$datadir}/third_party/abner/"
|
9
|
-
if !File.exists?(File.join(directory, 'abner.jar')) || $force
|
10
|
-
FileUtils.mkdir_p directory
|
11
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_abner.sh;cd -`
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
task 'banner' do
|
16
|
-
directory = "#{$datadir}/third_party/banner/"
|
17
|
-
if !File.exists?(File.join(directory, 'banner.jar')) || $force
|
18
|
-
FileUtils.mkdir_p directory
|
19
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_banner.sh;cd -`
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
task 'crf++' do
|
24
|
-
directory = "#{$datadir}/third_party/crf++/"
|
25
|
-
if !File.exists?(File.join(directory, 'ruby/CRFPP.so')) || $force
|
26
|
-
FileUtils.mkdir_p directory
|
27
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_crf++.sh;cd -`
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
task 'wordlists' do
|
34
|
-
FileUtils.cp_r File.join($scriptdir, 'wordlists/'), $datadir
|
35
|
-
end
|
36
|
-
|
37
|
-
task 'polysearch' do
|
38
|
-
directory = "#{$datadir}/dbs/polysearch/"
|
39
|
-
if !File.exists?(File.join(directory,'disease.txt')) || $force
|
40
|
-
FileUtils.mkdir_p directory
|
41
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_polysearch.sh;cd -`
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
task '3party' => %w(abner banner crf++)
|
47
|
-
|
48
|
-
task 'entrez' do
|
49
|
-
directory = "#{$datadir}/dbs/entrez/"
|
50
|
-
if !File.exists?(File.join(directory,'gene_info')) || $force
|
51
|
-
FileUtils.mkdir_p directory
|
52
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_entrez.sh;cd -`
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
task 'go' do
|
57
|
-
directory = "#{$datadir}/dbs/go/"
|
58
|
-
if !File.exists?(File.join(directory,'gene_ontology.obo')) || $force
|
59
|
-
FileUtils.mkdir_p directory
|
60
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_go.sh;cd -`
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
task 'biocreative' do
|
65
|
-
directory = "#{$datadir}/biocreative/"
|
66
|
-
if !File.exists?(File.join(directory, 'BC2GN')) || $force
|
67
|
-
FileUtils.mkdir_p directory
|
68
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_biocreative.sh;cd -`
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
task 'datasets' => %w(entrez biocreative)
|
74
|
-
|
75
|
-
task 'organisms' do
|
76
|
-
directory = "#{$datadir}/organisms"
|
77
|
-
FileUtils.mkdir_p directory
|
78
|
-
%w(Rakefile rake-include.rb).each{|f|
|
79
|
-
FileUtils.cp_r File.join($scriptdir, "organisms/#{ f }"), directory
|
80
|
-
}
|
81
|
-
Dir.glob(File.join($scriptdir, "organisms/*.Rakefile")).each{|f|
|
82
|
-
org = File.basename(f).sub(/.Rakefile/,'')
|
83
|
-
if !File.exists?(File.join(directory, org))
|
84
|
-
FileUtils.mkdir_p File.join(directory, org)
|
85
|
-
end
|
86
|
-
FileUtils.cp f , File.join(directory, "#{ org }/Rakefile")
|
87
|
-
}
|
88
|
-
`cd #{directory}; rake names`
|
89
|
-
end
|
90
|
-
|
91
|
-
task 'ner' do
|
92
|
-
directory = "#{$datadir}/ner"
|
93
|
-
FileUtils.mkdir_p directory
|
94
|
-
%w(Rakefile config).each{|f|
|
95
|
-
FileUtils.cp_r File.join($scriptdir, "ner/#{ f }"), directory
|
96
|
-
}
|
97
|
-
|
98
|
-
%w(data model results).each{|d|
|
99
|
-
FileUtils.mkdir_p File.join(directory, d)
|
100
|
-
}
|
101
|
-
end
|
102
|
-
|
103
|
-
task 'norm' do
|
104
|
-
directory = "#{$datadir}/norm"
|
105
|
-
FileUtils.mkdir_p directory
|
106
|
-
%w(Rakefile config functions.sh).each{|f|
|
107
|
-
FileUtils.cp_r File.join($scriptdir, "norm/#{ f }"), directory
|
108
|
-
}
|
109
|
-
%w(results models).each{|d|
|
110
|
-
FileUtils.mkdir_p File.join(directory, d)
|
111
|
-
}
|
112
|
-
end
|
113
|
-
|
114
|
-
task 'classifier' do
|
115
|
-
directory = "#{$datadir}/classifier"
|
116
|
-
FileUtils.mkdir_p directory
|
117
|
-
%w(Rakefile R).each{|f|
|
118
|
-
FileUtils.cp_r File.join($scriptdir, "classifier/#{ f }"), directory
|
119
|
-
}
|
120
|
-
%w(data model results).each{|d|
|
121
|
-
FileUtils.mkdir_p File.join(directory, d)
|
122
|
-
}
|
123
|
-
end
|
124
|
-
|