rbbt 1.1.7 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.rdoc +2 -138
- metadata +72 -136
- data/LICENSE +0 -20
- data/bin/rbbt_config +0 -246
- data/install_scripts/classifier/R/classify.R +0 -36
- data/install_scripts/classifier/Rakefile +0 -145
- data/install_scripts/get_abner.sh +0 -2
- data/install_scripts/get_banner.sh +0 -25
- data/install_scripts/get_biocreative.sh +0 -72
- data/install_scripts/get_crf++.sh +0 -26
- data/install_scripts/get_entrez.sh +0 -4
- data/install_scripts/get_go.sh +0 -4
- data/install_scripts/get_polysearch.sh +0 -8
- data/install_scripts/ner/Rakefile +0 -206
- data/install_scripts/ner/config/default.rb +0 -52
- data/install_scripts/norm/Rakefile +0 -219
- data/install_scripts/norm/config/cue_default.rb +0 -10
- data/install_scripts/norm/config/tokens_default.rb +0 -79
- data/install_scripts/norm/functions.sh +0 -23
- data/install_scripts/organisms/Rakefile +0 -43
- data/install_scripts/organisms/cgd.Rakefile +0 -84
- data/install_scripts/organisms/human.Rakefile +0 -145
- data/install_scripts/organisms/mgi.Rakefile +0 -77
- data/install_scripts/organisms/pombe.Rakefile +0 -40
- data/install_scripts/organisms/rake-include.rb +0 -258
- data/install_scripts/organisms/rgd.Rakefile +0 -88
- data/install_scripts/organisms/sgd.Rakefile +0 -66
- data/install_scripts/organisms/tair.Rakefile +0 -54
- data/install_scripts/organisms/worm.Rakefile +0 -109
- data/install_scripts/wordlists/consonants +0 -897
- data/install_scripts/wordlists/stopwords +0 -1
- data/lib/rbbt.rb +0 -86
- data/lib/rbbt/bow/bow.rb +0 -88
- data/lib/rbbt/bow/classifier.rb +0 -116
- data/lib/rbbt/bow/dictionary.rb +0 -187
- data/lib/rbbt/ner/abner.rb +0 -34
- data/lib/rbbt/ner/banner.rb +0 -73
- data/lib/rbbt/ner/dictionaryNER.rb +0 -98
- data/lib/rbbt/ner/regexpNER.rb +0 -70
- data/lib/rbbt/ner/rner.rb +0 -227
- data/lib/rbbt/ner/rnorm.rb +0 -143
- data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
- data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
- data/lib/rbbt/sources/biocreative.rb +0 -75
- data/lib/rbbt/sources/biomart.rb +0 -105
- data/lib/rbbt/sources/entrez.rb +0 -211
- data/lib/rbbt/sources/go.rb +0 -40
- data/lib/rbbt/sources/organism.rb +0 -245
- data/lib/rbbt/sources/polysearch.rb +0 -117
- data/lib/rbbt/sources/pubmed.rb +0 -111
- data/lib/rbbt/util/arrayHash.rb +0 -255
- data/lib/rbbt/util/filecache.rb +0 -72
- data/lib/rbbt/util/index.rb +0 -47
- data/lib/rbbt/util/misc.rb +0 -106
- data/lib/rbbt/util/open.rb +0 -235
- data/lib/rbbt/util/rake.rb +0 -183
- data/lib/rbbt/util/simpleDSL.rb +0 -87
- data/lib/rbbt/util/tmpfile.rb +0 -19
- data/tasks/install.rake +0 -124
data/lib/rbbt/util/rake.rb
DELETED
@@ -1,183 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
|
3
|
-
# Include the step_def and step methods to simplify Pipelines. Steps depend on
|
4
|
-
# the step strictly above by default. The output of the step is save marshaled,
|
5
|
-
# except for Strings which are save as text. The input of the step, the output
|
6
|
-
# of the previous step if availabe is accessed with the input method
|
7
|
-
#
|
8
|
-
# Example::
|
9
|
-
#
|
10
|
-
# step :text do
|
11
|
-
# "Text to revert"
|
12
|
-
# end
|
13
|
-
#
|
14
|
-
# step :revert do
|
15
|
-
# text = input
|
16
|
-
# text.reverse
|
17
|
-
# end
|
18
|
-
#
|
19
|
-
module Rake::Pipeline
|
20
|
-
|
21
|
-
module Rake::Pipeline::Step
|
22
|
-
|
23
|
-
class << self
|
24
|
-
|
25
|
-
@@step_descriptions = {}
|
26
|
-
def step_descriptions
|
27
|
-
@@step_descriptions
|
28
|
-
end
|
29
|
-
|
30
|
-
def add_description(re, step, message)
|
31
|
-
@@step_descriptions[re] = "#{ step }: #{ message }"
|
32
|
-
end
|
33
|
-
|
34
|
-
@@last_step = nil
|
35
|
-
def step_def(name, dependencies = nil)
|
36
|
-
|
37
|
-
re = Regexp.new(/(?:^|\/)#{name}\/.*$/)
|
38
|
-
|
39
|
-
# Take the last_description and associate it with the name
|
40
|
-
if Rake.application.last_description
|
41
|
-
add_description(re, name, Rake.application.last_description)
|
42
|
-
end
|
43
|
-
|
44
|
-
if dependencies.nil? && ! @@last_step.nil?
|
45
|
-
dependencies = @@last_step
|
46
|
-
end
|
47
|
-
@@last_step = name
|
48
|
-
|
49
|
-
# Generate the Hash definition
|
50
|
-
case
|
51
|
-
when dependencies.nil?
|
52
|
-
re
|
53
|
-
when String === dependencies || Symbol === dependencies
|
54
|
-
{re => lambda{|filename| filename.sub(name.to_s,dependencies.to_s) }}
|
55
|
-
when Array === dependencies
|
56
|
-
{re => lambda{|filename| dependencies.collect{|dep| filename.sub(name.to_s, dep.to_s) } }}
|
57
|
-
when Proc === dependencies
|
58
|
-
{re => dependencies}
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
module Rake::Pipeline::Info
|
67
|
-
|
68
|
-
def self.info_file(filename)
|
69
|
-
filename.sub(/^(.*?)(?:[^\/]*)\/([^\/]*)$/, '\1.info/\2.yaml')
|
70
|
-
end
|
71
|
-
|
72
|
-
def self.load_info(t)
|
73
|
-
filename = t.name
|
74
|
-
info_filename = info_file(filename)
|
75
|
-
|
76
|
-
if File.exists? info_filename
|
77
|
-
YAML.load(File.open(info_filename))
|
78
|
-
else
|
79
|
-
{}
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def self.save_info(t, info = {})
|
84
|
-
filename = t.name
|
85
|
-
info_filename = info_file(filename)
|
86
|
-
|
87
|
-
FileUtils.mkdir_p File.dirname(info_filename) unless File.exists? File.dirname(info_filename)
|
88
|
-
File.open(info_filename,'w'){|file|
|
89
|
-
file.write YAML.dump info
|
90
|
-
}
|
91
|
-
end
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
|
96
|
-
NON_ASCII_PRINTABLE = /[^\x20-\x7e\s]/
|
97
|
-
def is_binary?(file)
|
98
|
-
binary = file.read(1024) =~ NON_ASCII_PRINTABLE
|
99
|
-
file.rewind
|
100
|
-
binary
|
101
|
-
end
|
102
|
-
|
103
|
-
def step_descriptions
|
104
|
-
Rake::Pipeline::Step.step_descriptions
|
105
|
-
end
|
106
|
-
|
107
|
-
|
108
|
-
def step_def(*args)
|
109
|
-
Rake::Pipeline::Step.step_def(*args)
|
110
|
-
end
|
111
|
-
|
112
|
-
def infile(t, &block)
|
113
|
-
File.open(t.prerequisites.first) do |f|
|
114
|
-
block.call(f)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def outfile(t, &block)
|
119
|
-
File.open(t.name, 'w') do |f|
|
120
|
-
block.call(f)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
def load_input(t)
|
125
|
-
return nil if t.prerequisites.first.nil?
|
126
|
-
infile(t){|f|
|
127
|
-
if is_binary?(f)
|
128
|
-
Marshal.load(f)
|
129
|
-
else
|
130
|
-
f.read
|
131
|
-
end
|
132
|
-
}
|
133
|
-
end
|
134
|
-
|
135
|
-
def save_output(t, output)
|
136
|
-
case
|
137
|
-
when output.nil?
|
138
|
-
nil
|
139
|
-
when String === output
|
140
|
-
outfile(t){|f| f.write output }
|
141
|
-
else
|
142
|
-
outfile(t){|f| f.write Marshal.dump(output) }
|
143
|
-
end
|
144
|
-
|
145
|
-
end
|
146
|
-
|
147
|
-
# We cannot load the input variable before the block.call, so we need another method
|
148
|
-
|
149
|
-
# Load the input data from the previous step
|
150
|
-
def input
|
151
|
-
load_input(@@current_task) if @@current_task
|
152
|
-
end
|
153
|
-
|
154
|
-
# Add values to the info file
|
155
|
-
def info(values = {})
|
156
|
-
info = Rake::Pipeline::Info.load_info(@@current_task)
|
157
|
-
info = info.merge values
|
158
|
-
Rake::Pipeline::Info.save_info(@@current_task, info)
|
159
|
-
info
|
160
|
-
end
|
161
|
-
|
162
|
-
|
163
|
-
# Define a new step, it depends on the previously defined by default. It
|
164
|
-
# saves the output of the block so it can be loaded by the input method of
|
165
|
-
# the next step
|
166
|
-
def step(name, dependencies = nil, &block)
|
167
|
-
rule step_def(name, dependencies) do |t|
|
168
|
-
|
169
|
-
# Save the task object to be able to load the input
|
170
|
-
@@current_task = t
|
171
|
-
|
172
|
-
output = block.call(t)
|
173
|
-
|
174
|
-
save_output(t, output)
|
175
|
-
end
|
176
|
-
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
if __FILE__ == $0
|
181
|
-
|
182
|
-
p Rake::Pipeline::Info.info_file('work/diseases/t')
|
183
|
-
end
|
data/lib/rbbt/util/simpleDSL.rb
DELETED
@@ -1,87 +0,0 @@
|
|
1
|
-
require 'parse_tree_extensions'
|
2
|
-
require 'parse_tree'
|
3
|
-
require 'ruby2ruby'
|
4
|
-
|
5
|
-
# This class helps designing DSL in ruby based on method_missing. Class
|
6
|
-
# is initialize with a block of code or a file with the code, and it is
|
7
|
-
# given a method to be invoked instead of method missing. This class
|
8
|
-
# deals simply with making the method_missing alias and removing it and
|
9
|
-
# executing the block of file with code.
|
10
|
-
class SimpleDSL
|
11
|
-
|
12
|
-
class ConfigFileMissingError < StandardError; end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def hook_method(method = nil)
|
17
|
-
method ||= :DSL_action
|
18
|
-
@@restore_name = ("restore_DSL_" + method.to_s).to_sym
|
19
|
-
@@method_name = method.to_sym
|
20
|
-
|
21
|
-
class << self
|
22
|
-
@restore_stack ||= []
|
23
|
-
@restore_stack << @@restore_name
|
24
|
-
alias_method(@@restore_name, :method_missing)
|
25
|
-
alias_method(:method_missing, @@method_name)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def unhook_method
|
30
|
-
class << self
|
31
|
-
alias_method(:method_missing, @restore_stack.pop)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
public
|
36
|
-
|
37
|
-
def parse(method = nil, actions = nil, &block)
|
38
|
-
|
39
|
-
actions ||= block
|
40
|
-
|
41
|
-
hook_method(method)
|
42
|
-
|
43
|
-
# Execute
|
44
|
-
if actions.is_a? Proc
|
45
|
-
|
46
|
-
@config[@@method_name] = actions.to_ruby.collect[1..-2].join
|
47
|
-
|
48
|
-
instance_eval &actions
|
49
|
-
elsif File.exists?(actions)
|
50
|
-
|
51
|
-
@config[@@method_name] = File.open(actions).read
|
52
|
-
|
53
|
-
eval File.open(actions).read
|
54
|
-
end
|
55
|
-
|
56
|
-
unhook_method
|
57
|
-
|
58
|
-
end
|
59
|
-
|
60
|
-
|
61
|
-
# Processes a DSL. +method+ is the name of the method executed instead
|
62
|
-
# of method_missing. The code to be evaluated as a DSL is either
|
63
|
-
# specified in +&block+ or in the file pointed by +file+.
|
64
|
-
def initialize(method = nil, file = nil, &block)
|
65
|
-
@config = {}
|
66
|
-
if file
|
67
|
-
raise ConfigFileMissingError.new "File '#{ file }' is missing. Have you installed the config files? (use rbbt_config)." unless File.exists? file
|
68
|
-
parse(method, file)
|
69
|
-
end
|
70
|
-
|
71
|
-
if block
|
72
|
-
parse(method, block)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
# Returns the code with the DSL that was executed. If it came from a
|
77
|
-
# block it was turned to string using ruby2ruby.
|
78
|
-
def config(action = nil)
|
79
|
-
if action
|
80
|
-
@config[action.to_sym]
|
81
|
-
else
|
82
|
-
@config[:DSL_action]
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
|
data/lib/rbbt/util/tmpfile.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
require 'rbbt'
|
3
|
-
|
4
|
-
|
5
|
-
module TmpFile
|
6
|
-
|
7
|
-
# Creates a random file name, with the given suffix and a random number
|
8
|
-
# up to +max+
|
9
|
-
def self.random_name( s="",max=10000000)
|
10
|
-
n = rand(max)
|
11
|
-
s << n.to_s
|
12
|
-
s
|
13
|
-
end
|
14
|
-
|
15
|
-
# Creates a random filename in the temporary directory
|
16
|
-
def self.tmp_file(s = "",max=10000000)
|
17
|
-
File.join(Rbbt.tmpdir,random_name(s,max))
|
18
|
-
end
|
19
|
-
end
|
data/tasks/install.rake
DELETED
@@ -1,124 +0,0 @@
|
|
1
|
-
require 'rbbt'
|
2
|
-
|
3
|
-
$datadir = Rbbt.datadir
|
4
|
-
$scriptdir = File.join(Rbbt.rootdir, '/install_scripts')
|
5
|
-
|
6
|
-
|
7
|
-
task 'abner' do
|
8
|
-
directory = "#{$datadir}/third_party/abner/"
|
9
|
-
if !File.exists?(File.join(directory, 'abner.jar')) || $force
|
10
|
-
FileUtils.mkdir_p directory
|
11
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_abner.sh;cd -`
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
task 'banner' do
|
16
|
-
directory = "#{$datadir}/third_party/banner/"
|
17
|
-
if !File.exists?(File.join(directory, 'banner.jar')) || $force
|
18
|
-
FileUtils.mkdir_p directory
|
19
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_banner.sh;cd -`
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
task 'crf++' do
|
24
|
-
directory = "#{$datadir}/third_party/crf++/"
|
25
|
-
if !File.exists?(File.join(directory, 'ruby/CRFPP.so')) || $force
|
26
|
-
FileUtils.mkdir_p directory
|
27
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_crf++.sh;cd -`
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
task 'wordlists' do
|
34
|
-
FileUtils.cp_r File.join($scriptdir, 'wordlists/'), $datadir
|
35
|
-
end
|
36
|
-
|
37
|
-
task 'polysearch' do
|
38
|
-
directory = "#{$datadir}/dbs/polysearch/"
|
39
|
-
if !File.exists?(File.join(directory,'disease.txt')) || $force
|
40
|
-
FileUtils.mkdir_p directory
|
41
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_polysearch.sh;cd -`
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
task '3party' => %w(abner banner crf++)
|
47
|
-
|
48
|
-
task 'entrez' do
|
49
|
-
directory = "#{$datadir}/dbs/entrez/"
|
50
|
-
if !File.exists?(File.join(directory,'gene_info')) || $force
|
51
|
-
FileUtils.mkdir_p directory
|
52
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_entrez.sh;cd -`
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
task 'go' do
|
57
|
-
directory = "#{$datadir}/dbs/go/"
|
58
|
-
if !File.exists?(File.join(directory,'gene_ontology.obo')) || $force
|
59
|
-
FileUtils.mkdir_p directory
|
60
|
-
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_go.sh;cd -`
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
task 'biocreative' do
|
65
|
-
directory = "#{$datadir}/biocreative/"
|
66
|
-
if !File.exists?(File.join(directory, 'BC2GN')) || $force
|
67
|
-
FileUtils.mkdir_p directory
|
68
|
-
`cd #{directory};rm -Rf *; #{$scriptdir}/get_biocreative.sh;cd -`
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
task 'datasets' => %w(entrez biocreative)
|
74
|
-
|
75
|
-
task 'organisms' do
|
76
|
-
directory = "#{$datadir}/organisms"
|
77
|
-
FileUtils.mkdir_p directory
|
78
|
-
%w(Rakefile rake-include.rb).each{|f|
|
79
|
-
FileUtils.cp_r File.join($scriptdir, "organisms/#{ f }"), directory
|
80
|
-
}
|
81
|
-
Dir.glob(File.join($scriptdir, "organisms/*.Rakefile")).each{|f|
|
82
|
-
org = File.basename(f).sub(/.Rakefile/,'')
|
83
|
-
if !File.exists?(File.join(directory, org))
|
84
|
-
FileUtils.mkdir_p File.join(directory, org)
|
85
|
-
end
|
86
|
-
FileUtils.cp f , File.join(directory, "#{ org }/Rakefile")
|
87
|
-
}
|
88
|
-
`cd #{directory}; rake names`
|
89
|
-
end
|
90
|
-
|
91
|
-
task 'ner' do
|
92
|
-
directory = "#{$datadir}/ner"
|
93
|
-
FileUtils.mkdir_p directory
|
94
|
-
%w(Rakefile config).each{|f|
|
95
|
-
FileUtils.cp_r File.join($scriptdir, "ner/#{ f }"), directory
|
96
|
-
}
|
97
|
-
|
98
|
-
%w(data model results).each{|d|
|
99
|
-
FileUtils.mkdir_p File.join(directory, d)
|
100
|
-
}
|
101
|
-
end
|
102
|
-
|
103
|
-
task 'norm' do
|
104
|
-
directory = "#{$datadir}/norm"
|
105
|
-
FileUtils.mkdir_p directory
|
106
|
-
%w(Rakefile config functions.sh).each{|f|
|
107
|
-
FileUtils.cp_r File.join($scriptdir, "norm/#{ f }"), directory
|
108
|
-
}
|
109
|
-
%w(results models).each{|d|
|
110
|
-
FileUtils.mkdir_p File.join(directory, d)
|
111
|
-
}
|
112
|
-
end
|
113
|
-
|
114
|
-
task 'classifier' do
|
115
|
-
directory = "#{$datadir}/classifier"
|
116
|
-
FileUtils.mkdir_p directory
|
117
|
-
%w(Rakefile R).each{|f|
|
118
|
-
FileUtils.cp_r File.join($scriptdir, "classifier/#{ f }"), directory
|
119
|
-
}
|
120
|
-
%w(data model results).each{|d|
|
121
|
-
FileUtils.mkdir_p File.join(directory, d)
|
122
|
-
}
|
123
|
-
end
|
124
|
-
|