rbbt 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +17 -0
- data/bin/rbbt_config +180 -0
- data/install_scripts/classifier/R/classify.R +36 -0
- data/install_scripts/classifier/Rakefile +140 -0
- data/install_scripts/get_abner.sh +2 -0
- data/install_scripts/get_banner.sh +25 -0
- data/install_scripts/get_biocreative.sh +72 -0
- data/install_scripts/get_crf++.sh +26 -0
- data/install_scripts/get_entrez.sh +4 -0
- data/install_scripts/get_go.sh +4 -0
- data/install_scripts/get_polysearch.sh +8 -0
- data/install_scripts/ner/Rakefile +206 -0
- data/install_scripts/ner/config/default.rb +52 -0
- data/install_scripts/norm/Rakefile +218 -0
- data/install_scripts/norm/config/cue_default.rb +10 -0
- data/install_scripts/norm/config/tokens_default.rb +79 -0
- data/install_scripts/norm/functions.sh +21 -0
- data/install_scripts/organisms/Rakefile +25 -0
- data/install_scripts/organisms/cgd.Rakefile +84 -0
- data/install_scripts/organisms/human.Rakefile +145 -0
- data/install_scripts/organisms/mgi.Rakefile +77 -0
- data/install_scripts/organisms/pombe.Rakefile +40 -0
- data/install_scripts/organisms/rake-include.rb +258 -0
- data/install_scripts/organisms/rgd.Rakefile +88 -0
- data/install_scripts/organisms/sgd.Rakefile +66 -0
- data/install_scripts/organisms/tair.Rakefile +54 -0
- data/install_scripts/organisms/worm.Rakefile +109 -0
- data/install_scripts/stopwords +1 -0
- data/install_scripts/wordlists/consonants +897 -0
- data/install_scripts/wordlists/stopwords +1 -0
- data/lib/rbbt/bow/bow.rb +87 -0
- data/lib/rbbt/bow/classifier.rb +118 -0
- data/lib/rbbt/bow/dictionary.rb +218 -0
- data/lib/rbbt/ner/abner.rb +34 -0
- data/lib/rbbt/ner/banner.rb +73 -0
- data/lib/rbbt/ner/regexpNER.rb +62 -0
- data/lib/rbbt/ner/rner.rb +227 -0
- data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
- data/lib/rbbt/ner/rnorm/tokens.rb +213 -0
- data/lib/rbbt/ner/rnorm.rb +142 -0
- data/lib/rbbt/sources/biocreative.rb +75 -0
- data/lib/rbbt/sources/biomart.rb +106 -0
- data/lib/rbbt/sources/entrez.rb +211 -0
- data/lib/rbbt/sources/go.rb +40 -0
- data/lib/rbbt/sources/organism.rb +197 -0
- data/lib/rbbt/sources/polysearch.rb +88 -0
- data/lib/rbbt/sources/pubmed.rb +111 -0
- data/lib/rbbt/util/arrayHash.rb +255 -0
- data/lib/rbbt/util/filecache.rb +72 -0
- data/lib/rbbt/util/index.rb +69 -0
- data/lib/rbbt/util/misc.rb +101 -0
- data/lib/rbbt/util/open.rb +207 -0
- data/lib/rbbt/util/simpleDSL.rb +87 -0
- data/lib/rbbt/util/tmpfile.rb +19 -0
- data/lib/rbbt/version.rb +10 -0
- data/lib/rbbt.rb +86 -0
- data/tasks/install.rake +123 -0
- metadata +114 -0
@@ -0,0 +1,207 @@
|
|
1
|
+
require 'rbbt/util/tmpfile'
|
2
|
+
require 'rbbt'
|
3
|
+
|
4
|
+
|
5
|
+
# Provides with a few helper functions to read and write files, as well # as
|
6
|
+
# for accessing remote files. It supports caching the files.
|
7
|
+
module Open
|
8
|
+
|
9
|
+
class DirectoryNotFoundError < StandardError; end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
@@remote_cachedir = File.join(Rbbt.cachedir, 'open-remote/')
|
14
|
+
FileUtils.mkdir @@remote_cachedir unless File.exist? @@remote_cachedir
|
15
|
+
|
16
|
+
# If no data is specified and the url is found in the cache the saved
|
17
|
+
# contents are returned, if not found, the url is opened and the contents of
|
18
|
+
# that are returned. If +data+ is specified then it is saved in the
|
19
|
+
# cache under +url+. To match +url+ in the cache a MD5 digest is used.
|
20
|
+
# The location of the cache directory is bu default
|
21
|
+
# File.join(Rbbt.cachedir, 'open-remote/').
|
22
|
+
def self.cache(url, data = nil)
|
23
|
+
require 'digest/md5'
|
24
|
+
digest = Digest::MD5.hexdigest(url)
|
25
|
+
|
26
|
+
if data
|
27
|
+
Open.write(File.join(@@remote_cachedir, digest), data)
|
28
|
+
return nil
|
29
|
+
else
|
30
|
+
if File.exist? File.join(@@remote_cachedir, digest)
|
31
|
+
return File.open(File.join(@@remote_cachedir, digest)){|file| file.read }
|
32
|
+
else
|
33
|
+
return nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Checks if +url+ is a remote file.
|
39
|
+
def self.remote(url)
|
40
|
+
url =~ /^(?:http|ssh|https|ftp):\/\//
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
# Checks if +url+ is a gzip file.
|
45
|
+
def self.gziped(url)
|
46
|
+
if remote(url)
|
47
|
+
return url =~ /\.gz$/ || url =~ /\.gz\?.*$/
|
48
|
+
else
|
49
|
+
return url =~ /\.gz$/
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
public
|
54
|
+
# Reads the file specified by url. If the url es local it just opens
|
55
|
+
# the file, if it is remote if checks the cache first. In any case, it
|
56
|
+
# unzips gzip files automatically.
|
57
|
+
#
|
58
|
+
# Options:
|
59
|
+
# * :quiet => Do not print the progress of downloads
|
60
|
+
# * :nocache => do not use the cache.
|
61
|
+
#
|
62
|
+
def self.read(url, options = {})
|
63
|
+
|
64
|
+
case
|
65
|
+
when remote(url)
|
66
|
+
if !options[:nocache] && data = cache(url)
|
67
|
+
return data
|
68
|
+
end
|
69
|
+
|
70
|
+
tmp = TmpFile.tmp_file("open-")
|
71
|
+
`wget -O #{tmp} '#{url}' #{options[:quiet] ? '-q' : '' }`
|
72
|
+
if gziped(url)
|
73
|
+
`mv #{tmp} #{tmp}.gz; gunzip #{tmp}`
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
cache(url, File.open(tmp){|file| file.read}) unless options[:nocache]
|
78
|
+
|
79
|
+
data = File.open(tmp){|file| file.read}
|
80
|
+
FileUtils.rm tmp
|
81
|
+
return data
|
82
|
+
when IO === url
|
83
|
+
url.read
|
84
|
+
else
|
85
|
+
return File.open(url){|file| file.read}
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
# Writes the contents on the path specified by filename
|
91
|
+
#
|
92
|
+
# Options:
|
93
|
+
# * :force => Create directories if missing.
|
94
|
+
def self.write(filename, content, options = {})
|
95
|
+
if !File.exist? File.dirname(filename)
|
96
|
+
if options[:force]
|
97
|
+
FileUtils.makedirs(File.dirname(filename))
|
98
|
+
else
|
99
|
+
raise Open::DirectoryNotFoundError, "Directory #{File.dirname(filename)} was not found"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
File.open(filename,'w'){|f|
|
104
|
+
f.write content
|
105
|
+
}
|
106
|
+
|
107
|
+
nil
|
108
|
+
end
|
109
|
+
|
110
|
+
# Writes the contents on the path specified by filename. If the file
|
111
|
+
# is present it appends the contents.
|
112
|
+
#
|
113
|
+
# Options:
|
114
|
+
# * :force => Create directories if missing.
|
115
|
+
def self.append(filename, content, options ={})
|
116
|
+
if !File.exist? File.dirname(filename)
|
117
|
+
if options[:force]
|
118
|
+
FileUtils.makedirs(File.dirname(filename))
|
119
|
+
else
|
120
|
+
raise Open::DirectoryNotFoundError, "Directory #{File.dirname(filename)} was not found"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
f = File.open(filename,'a')
|
125
|
+
f.write content
|
126
|
+
f.close
|
127
|
+
|
128
|
+
nil
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
# Reads a file with rows with elementes separated by a given pattern
|
134
|
+
# and builds a hash with it. The keys of the hash are the elements in
|
135
|
+
# the :native positions, by default the first (0). The value for each
|
136
|
+
# key is an array with one position for each of the rest possible
|
137
|
+
# positions specified in :extra, by default all but the :native. Since
|
138
|
+
# the native key may be repeated, each of the positions of the values
|
139
|
+
# is in itself an array. There are a number of options to change this
|
140
|
+
# behaviour.
|
141
|
+
#
|
142
|
+
# Options:
|
143
|
+
# * :native => position of the elements that will constitute the keys. By default 0.
|
144
|
+
# * :extra => positions of the rest of elements. By default all but :native. It can be an array of positions or a single position.
|
145
|
+
# * :sep => pattern to use in splitting the lines into elements, by default "\t"
|
146
|
+
# * :flatten => flatten the array of arrays that hold the values for each key into a simple array.
|
147
|
+
# * :single => for each key select only the first of the values, instead of the complete array.
|
148
|
+
# * :fix => A Proc that is called to pre-process the line
|
149
|
+
# * :exclude => A Proc that is called to check if the line must be excluded from the process.
|
150
|
+
def self.to_hash(filename, options = {})
|
151
|
+
native = options[:native] || 0
|
152
|
+
extra = options[:extra]
|
153
|
+
exclude = options[:exclude]
|
154
|
+
fix = options[:fix]
|
155
|
+
sep = options[:sep] || "\t"
|
156
|
+
single = options[:single]
|
157
|
+
single = false if single.nil?
|
158
|
+
flatten = options[:flatten] || single
|
159
|
+
flatten = single if flatten.nil?
|
160
|
+
|
161
|
+
extra = [extra] if extra && ! extra.is_a?( Array)
|
162
|
+
|
163
|
+
data = {}
|
164
|
+
Open.read(filename).each{|l|
|
165
|
+
l = fix.call(l) if fix
|
166
|
+
next if exclude and exclude.call(l)
|
167
|
+
|
168
|
+
parts = l.chomp.split(/#{sep}/)
|
169
|
+
id = parts[native]
|
170
|
+
next if id.nil? || id == ""
|
171
|
+
|
172
|
+
data[id] ||= []
|
173
|
+
if extra
|
174
|
+
fields = extra
|
175
|
+
else
|
176
|
+
fields = (0..(parts.length - 1)).to_a - [native]
|
177
|
+
end
|
178
|
+
fields.each_with_index{|pos,i|
|
179
|
+
data[id][i] ||= []
|
180
|
+
data[id][i] << parts[pos]
|
181
|
+
}
|
182
|
+
}
|
183
|
+
|
184
|
+
if flatten
|
185
|
+
data.each{|key, values|
|
186
|
+
if values
|
187
|
+
values.flatten!
|
188
|
+
values.collect!{|v|
|
189
|
+
if v != ""
|
190
|
+
v
|
191
|
+
else
|
192
|
+
nil
|
193
|
+
end
|
194
|
+
}
|
195
|
+
values.compact!
|
196
|
+
else
|
197
|
+
nil
|
198
|
+
end
|
199
|
+
}
|
200
|
+
end
|
201
|
+
|
202
|
+
data = Hash[*(data.collect{|key,values| [key, values.first]}).flatten] if single
|
203
|
+
|
204
|
+
data
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'parse_tree_extensions'
|
2
|
+
require 'parse_tree'
|
3
|
+
require 'ruby2ruby'
|
4
|
+
|
5
|
+
# This class helps designing DSL in ruby based on method_missing. Class
|
6
|
+
# is initialize with a block of code or a file with the code, and it is
|
7
|
+
# given a method to be invoked instead of method missing. This class
|
8
|
+
# deals simply with making the method_missing alias and removing it and
|
9
|
+
# executing the block of file with code.
|
10
|
+
class SimpleDSL
|
11
|
+
|
12
|
+
class ConfigFileMissingError < StandardError; end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def hook_method(method = nil)
|
17
|
+
method ||= :DSL_action
|
18
|
+
@@restore_name = ("restore_DSL_" + method.to_s).to_sym
|
19
|
+
@@method_name = method.to_sym
|
20
|
+
|
21
|
+
class << self
|
22
|
+
@restore_stack ||= []
|
23
|
+
@restore_stack << @@restore_name
|
24
|
+
alias_method(@@restore_name, :method_missing)
|
25
|
+
alias_method(:method_missing, @@method_name)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def unhook_method
|
30
|
+
class << self
|
31
|
+
alias_method(:method_missing, @restore_stack.pop)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
public
|
36
|
+
|
37
|
+
def parse(method = nil, actions = nil, &block)
|
38
|
+
|
39
|
+
actions ||= block
|
40
|
+
|
41
|
+
hook_method(method)
|
42
|
+
|
43
|
+
# Execute
|
44
|
+
if actions.is_a? Proc
|
45
|
+
|
46
|
+
@config[@@method_name] = actions.to_ruby.collect[1..-2].join
|
47
|
+
|
48
|
+
instance_eval &actions
|
49
|
+
elsif File.exists?(actions)
|
50
|
+
|
51
|
+
@config[@@method_name] = File.open(actions).read
|
52
|
+
|
53
|
+
eval File.open(actions).read
|
54
|
+
end
|
55
|
+
|
56
|
+
unhook_method
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
# Processes a DSL. +method+ is the name of the method executed instead
|
62
|
+
# of method_missing. The code to be evaluated as a DSL is either
|
63
|
+
# specified in +&block+ or in the file pointed by +file+.
|
64
|
+
def initialize(method = nil, file = nil, &block)
|
65
|
+
@config = {}
|
66
|
+
if file
|
67
|
+
raise ConfigFileMissingError.new "File '#{ file }' is missing. Have you installed the config files? (rbbt_config install norm)." unless File.exists? file
|
68
|
+
parse(method, file)
|
69
|
+
end
|
70
|
+
|
71
|
+
if block
|
72
|
+
parse(method, block)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Returns the code with the DSL that was executed. If it came from a
|
77
|
+
# block it was turned to string using ruby2ruby.
|
78
|
+
def config(action = nil)
|
79
|
+
if action
|
80
|
+
@config[action.to_sym]
|
81
|
+
else
|
82
|
+
@config[:DSL_action]
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'rbbt'
|
3
|
+
|
4
|
+
|
5
|
+
module TmpFile
|
6
|
+
|
7
|
+
# Creates a random file name, with the given suffix and a random number
|
8
|
+
# up to +max+
|
9
|
+
def self.random_name( s="",max=10000000)
|
10
|
+
n = rand(max)
|
11
|
+
s << n.to_s
|
12
|
+
s
|
13
|
+
end
|
14
|
+
|
15
|
+
# Creates a random filename in the temporary directory
|
16
|
+
def self.tmp_file(s = "",max=10000000)
|
17
|
+
File.join(Rbbt.tmpdir,random_name(s,max))
|
18
|
+
end
|
19
|
+
end
|
data/lib/rbbt/version.rb
ADDED
data/lib/rbbt.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
require 'fileutils'
|
5
|
+
require 'yaml'
|
6
|
+
|
7
|
+
|
8
|
+
# This module implements a number of utilities aimed at performing Text
|
9
|
+
# Mining of BioMedical data. I includes the following:
|
10
|
+
#
|
11
|
+
# * Multi-purpose Named Entity Recognition and Normalization. And training data for
|
12
|
+
# Gene Mention from the BioCreative competition.
|
13
|
+
# * Document Classification
|
14
|
+
# * Interfaces to Gene Ontology, Entrez Gene, BioMart and PubMed
|
15
|
+
#
|
16
|
+
# There are a number of classes to help gather and integrate the
|
17
|
+
# information from all the sources. It is design to be very flexible,
|
18
|
+
# but with a sensible set of defaults.
|
19
|
+
#
|
20
|
+
module Rbbt
|
21
|
+
|
22
|
+
class NoConfig < Exception; end
|
23
|
+
|
24
|
+
@@rootdir = File.dirname(File.dirname(__FILE__))
|
25
|
+
|
26
|
+
@@datadir = @@cachedir = @@tmpdir = nil
|
27
|
+
|
28
|
+
def self.load_config
|
29
|
+
if File.exist?(File.join(@@rootdir, 'rbbt.config'))
|
30
|
+
config = YAML.load_file(File.join(@@rootdir, 'rbbt.config'))
|
31
|
+
if config.is_a? Hash
|
32
|
+
@@datadir = config['datadir'] if config['datadir']
|
33
|
+
@@cachedir = config['cachedir'] if config['cachedir']
|
34
|
+
@@tmpdir = config['tmpdir'] if config['tmpdir']
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
if File.exist?(File.join(ENV['HOME'], '.rbbt'))
|
41
|
+
config = YAML.load_file(File.join(ENV['HOME'], '.rbbt') )
|
42
|
+
if config.is_a? Hash
|
43
|
+
@@datadir = config['datadir'] if config['datadir']
|
44
|
+
@@cachedir = config['cachedir'] if config['cachedir']
|
45
|
+
@@tmpdir = config['tmpdir'] if config['tmpdir']
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
if @@datadir.nil? || @@cachedir.nil? || @@tmpdir.nil?
|
50
|
+
raise NoConfig, "rbbt not configured. Edit #{File.join(@@rootdir, 'rbbt.config')} or $HOME/.rbbt"
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
FileUtils.mkdir_p @@datadir unless File.exist? @@datadir
|
55
|
+
FileUtils.mkdir_p @@cachedir unless File.exist? @@cachedir
|
56
|
+
FileUtils.mkdir_p @@tmpdir unless File.exist? @@tmpdir
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
# For some reason banner.jar must be loaded before abner.jar
|
61
|
+
ENV['CLASSPATH'] ||= ""
|
62
|
+
ENV['CLASSPATH'] += ":" + %w(banner abner).collect{|pkg| File.join(datadir, "third_party/#{pkg}/#{ pkg }.jar")}.join(":")
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.rootdir
|
66
|
+
@@rootdir
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def self.datadir
|
71
|
+
@@datadir
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.cachedir
|
75
|
+
@@cachedir
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.tmpdir
|
79
|
+
@@tmpdir
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
self.load_config
|
84
|
+
end
|
85
|
+
|
86
|
+
|
data/tasks/install.rake
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
|
3
|
+
$datadir = Rbbt.datadir
|
4
|
+
$scriptdir = File.join(Rbbt.rootdir, '/install_scripts')
|
5
|
+
|
6
|
+
|
7
|
+
task 'abner' do
|
8
|
+
directory = "#{$datadir}/third_party/abner/"
|
9
|
+
if !File.exists?(File.join(directory, 'abner.jar')) || $force
|
10
|
+
FileUtils.mkdir_p directory
|
11
|
+
`cd #{directory};rm -Rf *; #{$scriptdir}/get_abner.sh;cd -`
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
task 'banner' do
|
16
|
+
directory = "#{$datadir}/third_party/banner/"
|
17
|
+
if !File.exists?(File.join(directory, 'banner.jar')) || $force
|
18
|
+
FileUtils.mkdir_p directory
|
19
|
+
`cd #{directory};rm -Rf *; #{$scriptdir}/get_banner.sh;cd -`
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
task 'crf++' do
|
24
|
+
directory = "#{$datadir}/third_party/crf++/"
|
25
|
+
if !File.exists?(File.join(directory, 'ruby/CRFPP.so')) || $force
|
26
|
+
FileUtils.mkdir_p directory
|
27
|
+
`cd #{directory};rm -Rf *; #{$scriptdir}/get_crf++.sh;cd -`
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
task 'wordlists' do
|
34
|
+
FileUtils.cp_r File.join($scriptdir, 'wordlists/'), $datadir
|
35
|
+
end
|
36
|
+
|
37
|
+
task 'polysearch' do
|
38
|
+
directory = "#{$datadir}/dbs/polysearch/"
|
39
|
+
if !File.exists?(File.join(directory,'disease.txt')) || $force
|
40
|
+
FileUtils.mkdir_p directory
|
41
|
+
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_polysearch.sh;cd -`
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
task '3party' => %w(abner banner crf++)
|
47
|
+
|
48
|
+
task 'entrez' do
|
49
|
+
directory = "#{$datadir}/dbs/entrez/"
|
50
|
+
if !File.exists?(File.join(directory,'gene_info')) || $force
|
51
|
+
FileUtils.mkdir_p directory
|
52
|
+
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_entrez.sh;cd -`
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
task 'go' do
|
57
|
+
directory = "#{$datadir}/dbs/go/"
|
58
|
+
if !File.exists?(File.join(directory,'gene_ontology.obo')) || $force
|
59
|
+
FileUtils.mkdir_p directory
|
60
|
+
`cd #{directory}/; rm * -Rf; #{$scriptdir}/get_go.sh;cd -`
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
task 'biocreative' do
|
65
|
+
directory = "#{$datadir}/biocreative/"
|
66
|
+
if !File.exists?(File.join(directory, 'BC2GN')) || $force
|
67
|
+
FileUtils.mkdir_p directory
|
68
|
+
`cd #{directory};rm -Rf *; #{$scriptdir}/get_biocreative.sh;cd -`
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
task 'datasets' => %w(entrez biocreative)
|
74
|
+
|
75
|
+
task 'organisms' do
|
76
|
+
directory = "#{$datadir}/organisms"
|
77
|
+
FileUtils.mkdir_p directory
|
78
|
+
%w(Rakefile rake-include.rb).each{|f|
|
79
|
+
FileUtils.cp_r File.join($scriptdir, "organisms/#{ f }"), directory
|
80
|
+
}
|
81
|
+
Dir.glob(File.join($scriptdir, "organisms/*.Rakefile")).each{|f|
|
82
|
+
org = File.basename(f).sub(/.Rakefile/,'')
|
83
|
+
if !File.exists?(File.join(directory, org))
|
84
|
+
FileUtils.mkdir_p File.join(directory, org)
|
85
|
+
end
|
86
|
+
FileUtils.cp f , File.join(directory, "#{ org }/Rakefile")
|
87
|
+
}
|
88
|
+
end
|
89
|
+
|
90
|
+
task 'ner' do
|
91
|
+
directory = "#{$datadir}/ner"
|
92
|
+
FileUtils.mkdir_p directory
|
93
|
+
%w(Rakefile config).each{|f|
|
94
|
+
FileUtils.cp_r File.join($scriptdir, "ner/#{ f }"), directory
|
95
|
+
}
|
96
|
+
|
97
|
+
%w(data model results).each{|d|
|
98
|
+
FileUtils.mkdir_p File.join(directory, d)
|
99
|
+
}
|
100
|
+
end
|
101
|
+
|
102
|
+
task 'norm' do
|
103
|
+
directory = "#{$datadir}/norm"
|
104
|
+
FileUtils.mkdir_p directory
|
105
|
+
%w(Rakefile config).each{|f|
|
106
|
+
FileUtils.cp_r File.join($scriptdir, "norm/#{ f }"), directory
|
107
|
+
}
|
108
|
+
%w(results).each{|d|
|
109
|
+
FileUtils.mkdir_p File.join(directory, d)
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
task 'classifier' do
|
114
|
+
directory = "#{$datadir}/classifier"
|
115
|
+
FileUtils.mkdir_p directory
|
116
|
+
%w(Rakefile R).each{|f|
|
117
|
+
FileUtils.cp_r File.join($scriptdir, "classifier/#{ f }"), directory
|
118
|
+
}
|
119
|
+
%w(data model results).each{|d|
|
120
|
+
FileUtils.mkdir_p File.join(directory, d)
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rbbt
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Miguel Vazquez
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-29 00:00:00 +01:00
|
13
|
+
default_executable: rbbt_config
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: |-
|
17
|
+
This toolbox includes modules for text-mining, like Named Entity Recognition and Normalization and document
|
18
|
+
classification, as well as data integration modules that interface with PubMed, Entrez Gene, BioMart.
|
19
|
+
email: miguel.vazquez@fdi.ucm.es
|
20
|
+
executables:
|
21
|
+
- rbbt_config
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files:
|
25
|
+
- LICENSE
|
26
|
+
- README.rdoc
|
27
|
+
files:
|
28
|
+
- install_scripts/classifier/R/classify.R
|
29
|
+
- install_scripts/classifier/Rakefile
|
30
|
+
- install_scripts/get_abner.sh
|
31
|
+
- install_scripts/get_banner.sh
|
32
|
+
- install_scripts/get_biocreative.sh
|
33
|
+
- install_scripts/get_crf++.sh
|
34
|
+
- install_scripts/get_entrez.sh
|
35
|
+
- install_scripts/get_go.sh
|
36
|
+
- install_scripts/get_polysearch.sh
|
37
|
+
- install_scripts/ner/Rakefile
|
38
|
+
- install_scripts/ner/config/default.rb
|
39
|
+
- install_scripts/norm/Rakefile
|
40
|
+
- install_scripts/norm/config/cue_default.rb
|
41
|
+
- install_scripts/norm/config/tokens_default.rb
|
42
|
+
- install_scripts/norm/functions.sh
|
43
|
+
- install_scripts/organisms/Rakefile
|
44
|
+
- install_scripts/organisms/cgd.Rakefile
|
45
|
+
- install_scripts/organisms/human.Rakefile
|
46
|
+
- install_scripts/organisms/mgi.Rakefile
|
47
|
+
- install_scripts/organisms/pombe.Rakefile
|
48
|
+
- install_scripts/organisms/rake-include.rb
|
49
|
+
- install_scripts/organisms/rgd.Rakefile
|
50
|
+
- install_scripts/organisms/sgd.Rakefile
|
51
|
+
- install_scripts/organisms/tair.Rakefile
|
52
|
+
- install_scripts/organisms/worm.Rakefile
|
53
|
+
- install_scripts/stopwords
|
54
|
+
- install_scripts/wordlists/consonants
|
55
|
+
- install_scripts/wordlists/stopwords
|
56
|
+
- lib/rbbt.rb
|
57
|
+
- lib/rbbt/bow/bow.rb
|
58
|
+
- lib/rbbt/bow/classifier.rb
|
59
|
+
- lib/rbbt/bow/dictionary.rb
|
60
|
+
- lib/rbbt/ner/abner.rb
|
61
|
+
- lib/rbbt/ner/banner.rb
|
62
|
+
- lib/rbbt/ner/regexpNER.rb
|
63
|
+
- lib/rbbt/ner/rner.rb
|
64
|
+
- lib/rbbt/ner/rnorm.rb
|
65
|
+
- lib/rbbt/ner/rnorm/cue_index.rb
|
66
|
+
- lib/rbbt/ner/rnorm/tokens.rb
|
67
|
+
- lib/rbbt/sources/biocreative.rb
|
68
|
+
- lib/rbbt/sources/biomart.rb
|
69
|
+
- lib/rbbt/sources/entrez.rb
|
70
|
+
- lib/rbbt/sources/go.rb
|
71
|
+
- lib/rbbt/sources/organism.rb
|
72
|
+
- lib/rbbt/sources/polysearch.rb
|
73
|
+
- lib/rbbt/sources/pubmed.rb
|
74
|
+
- lib/rbbt/util/arrayHash.rb
|
75
|
+
- lib/rbbt/util/filecache.rb
|
76
|
+
- lib/rbbt/util/index.rb
|
77
|
+
- lib/rbbt/util/misc.rb
|
78
|
+
- lib/rbbt/util/open.rb
|
79
|
+
- lib/rbbt/util/simpleDSL.rb
|
80
|
+
- lib/rbbt/util/tmpfile.rb
|
81
|
+
- lib/rbbt/version.rb
|
82
|
+
- tasks/install.rake
|
83
|
+
- LICENSE
|
84
|
+
- README.rdoc
|
85
|
+
has_rdoc: true
|
86
|
+
homepage: http://github.com/mikisvaz/rbbt
|
87
|
+
licenses: []
|
88
|
+
|
89
|
+
post_install_message:
|
90
|
+
rdoc_options:
|
91
|
+
- --charset=UTF-8
|
92
|
+
require_paths:
|
93
|
+
- lib
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: "0"
|
99
|
+
version:
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: "0"
|
105
|
+
version:
|
106
|
+
requirements: []
|
107
|
+
|
108
|
+
rubyforge_project:
|
109
|
+
rubygems_version: 1.3.5
|
110
|
+
signing_key:
|
111
|
+
specification_version: 3
|
112
|
+
summary: Bioinformatics and text mining toolbox
|
113
|
+
test_files: []
|
114
|
+
|