rbbt 1.2.5 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +69 -214
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -245
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -140
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -86
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Ath.Rakefile +0 -55
  22. data/install_scripts/organisms/Cal.Rakefile +0 -84
  23. data/install_scripts/organisms/Cel.Rakefile +0 -109
  24. data/install_scripts/organisms/Hsa.Rakefile +0 -140
  25. data/install_scripts/organisms/Mmu.Rakefile +0 -77
  26. data/install_scripts/organisms/Rakefile +0 -43
  27. data/install_scripts/organisms/Rno.Rakefile +0 -88
  28. data/install_scripts/organisms/Sce.Rakefile +0 -66
  29. data/install_scripts/organisms/Spo.Rakefile +0 -40
  30. data/install_scripts/organisms/rake-include.rb +0 -252
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -83
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -85
  49. data/lib/rbbt/sources/gscholar.rb +0 -74
  50. data/lib/rbbt/sources/organism.rb +0 -241
  51. data/lib/rbbt/sources/polysearch.rb +0 -117
  52. data/lib/rbbt/sources/pubmed.rb +0 -248
  53. data/lib/rbbt/util/arrayHash.rb +0 -266
  54. data/lib/rbbt/util/filecache.rb +0 -72
  55. data/lib/rbbt/util/index.rb +0 -47
  56. data/lib/rbbt/util/misc.rb +0 -106
  57. data/lib/rbbt/util/open.rb +0 -251
  58. data/lib/rbbt/util/rake.rb +0 -183
  59. data/lib/rbbt/util/simpleDSL.rb +0 -87
  60. data/lib/rbbt/util/tmpfile.rb +0 -35
  61. data/tasks/install.rake +0 -124
  62. data/test/rbbt/bow/test_bow.rb +0 -33
  63. data/test/rbbt/bow/test_classifier.rb +0 -72
  64. data/test/rbbt/bow/test_dictionary.rb +0 -91
  65. data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
  66. data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
  67. data/test/rbbt/ner/test_abner.rb +0 -17
  68. data/test/rbbt/ner/test_banner.rb +0 -17
  69. data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
  70. data/test/rbbt/ner/test_regexpNER.rb +0 -33
  71. data/test/rbbt/ner/test_rner.rb +0 -126
  72. data/test/rbbt/ner/test_rnorm.rb +0 -47
  73. data/test/rbbt/sources/test_biocreative.rb +0 -38
  74. data/test/rbbt/sources/test_biomart.rb +0 -31
  75. data/test/rbbt/sources/test_entrez.rb +0 -49
  76. data/test/rbbt/sources/test_go.rb +0 -24
  77. data/test/rbbt/sources/test_organism.rb +0 -59
  78. data/test/rbbt/sources/test_polysearch.rb +0 -27
  79. data/test/rbbt/sources/test_pubmed.rb +0 -39
  80. data/test/rbbt/util/test_arrayHash.rb +0 -257
  81. data/test/rbbt/util/test_filecache.rb +0 -37
  82. data/test/rbbt/util/test_index.rb +0 -31
  83. data/test/rbbt/util/test_misc.rb +0 -20
  84. data/test/rbbt/util/test_open.rb +0 -110
  85. data/test/rbbt/util/test_simpleDSL.rb +0 -57
  86. data/test/rbbt/util/test_tmpfile.rb +0 -21
  87. data/test/test_helper.rb +0 -4
  88. data/test/test_rbbt.rb +0 -11
@@ -1,251 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/tmpfile'
3
-
4
-
5
- # Provides with a few helper functions to read and write files, as well # as
6
- # for accessing remote files. It supports caching the files.
7
- module Open
8
-
9
- # Return a Proc to use in the :select parameter of the Open.to_hash method.
10
- # It selects those lines with the content of the first field present on the
11
- # entities array. The field can be chosen to be a different one in the
12
- # options hash, also the separation string or regexp to determine fields.
13
- def self.func_match_field(entities, options = {})
14
- field, sep = {:field => 0, :sep => "\t"}.merge(options).values_at(:field, :sep)
15
-
16
- Proc.new {|line| entities.include? line.split(sep)[field] }
17
- end
18
-
19
- def self.fields(line, sep = "\t")
20
- line << sep
21
- line << "PLACEHOLDER"
22
- chunks = line.split(/(#{sep})/).select{|c| c !~ /^#{sep}$/ }
23
- if line =~ /#{sep}$/
24
- chunks << ""
25
- end
26
- chunks.pop
27
- chunks
28
- end
29
-
30
- class DirectoryNotFoundError < StandardError; end
31
- class OpenURLError < StandardError; end
32
-
33
- private
34
-
35
- @@remote_cachedir = File.join(Rbbt.cachedir, 'open-remote/')
36
- FileUtils.mkdir @@remote_cachedir unless File.exist? @@remote_cachedir
37
-
38
- # If no data is specified and the url is found in the cache the saved
39
- # contents are returned, if not found, the url is opened and the contents of
40
- # that are returned. If +data+ is specified then it is saved in the
41
- # cache under +url+. To match +url+ in the cache a MD5 digest is used.
42
- # The location of the cache directory is bu default
43
- # File.join(Rbbt.cachedir, 'open-remote/').
44
- def self.cache(url, data = nil)
45
- require 'digest/md5'
46
- digest = Digest::MD5.hexdigest(url)
47
-
48
- if data
49
- Open.write(File.join(@@remote_cachedir, digest), data)
50
- return nil
51
- else
52
- if File.exist? File.join(@@remote_cachedir, digest)
53
- return File.open(File.join(@@remote_cachedir, digest)){|file| file.read }
54
- else
55
- return nil
56
- end
57
- end
58
- end
59
-
60
- # Checks if +url+ is a remote file.
61
- def self.remote(url)
62
- url =~ /^(?:http|ssh|https|ftp):\/\//
63
- end
64
-
65
-
66
- # Checks if +url+ is a gzip file.
67
- def self.gziped(url)
68
- if remote(url)
69
- return url =~ /\.gz$/ || url =~ /\.gz\?.*$/
70
- else
71
- return url =~ /\.gz$/
72
- end
73
- end
74
-
75
-
76
- @@last_time = Time.now
77
- def self.wait(lag = 0)
78
- time = Time.now
79
-
80
- if time < @@last_time + lag
81
- sleep @@last_time + lag - time
82
- end
83
-
84
- @@last_time = Time.now
85
- end
86
-
87
- public
88
- # Reads the file specified by url. If the url es local it just opens
89
- # the file, if it is remote if checks the cache first. In any case, it
90
- # unzips gzip files automatically.
91
- #
92
- # Options:
93
- # * :quiet => Do not print the progress of downloads
94
- # * :nocache => do not use the cache.
95
- # * :nice => secconds to wait between online queries
96
- #
97
- def self.read(url, options = {})
98
-
99
- case
100
- when remote(url)
101
- if !options[:nocache] && data = cache(url)
102
- return data
103
- end
104
-
105
- wait(options[:nice]) if options[:nice]
106
- tmp = TmpFile.tmp_file("open-")
107
- `wget --user-agent=firefox -O #{tmp} '#{url}' #{options[:quiet] ? '-q' : '' }`
108
-
109
- if $?.success?
110
- if gziped(url)
111
- `mv #{tmp} #{tmp}.gz; gunzip #{tmp}`
112
- end
113
-
114
- cache(url, File.open(tmp){|file| file.read}) unless options[:nocache]
115
-
116
- data = File.open(tmp){|file| file.read}
117
- FileUtils.rm tmp
118
- return data
119
- else
120
- raise OpenURLError, "Error reading remote url: #{ url }"
121
- end
122
-
123
- when IO === url
124
- url.read
125
- else
126
- return File.open(url){|file| file.read}
127
- end
128
-
129
- end
130
-
131
- # Writes the contents on the path specified by filename
132
- #
133
- # Options:
134
- # * :force => Create directories if missing.
135
- def self.write(filename, content, options = {})
136
- if !File.exist? File.dirname(filename)
137
- if options[:force]
138
- FileUtils.makedirs(File.dirname(filename))
139
- else
140
- raise Open::DirectoryNotFoundError, "Directory #{File.dirname(filename)} was not found"
141
- end
142
- end
143
-
144
- File.open(filename,'w'){|f|
145
- f.write content
146
- }
147
-
148
- nil
149
- end
150
-
151
- # Writes the contents on the path specified by filename. If the file
152
- # is present it appends the contents.
153
- #
154
- # Options:
155
- # * :force => Create directories if missing.
156
- def self.append(filename, content, options ={})
157
- if !File.exist? File.dirname(filename)
158
- if options[:force]
159
- FileUtils.makedirs(File.dirname(filename))
160
- else
161
- raise Open::DirectoryNotFoundError, "Directory #{File.dirname(filename)} was not found"
162
- end
163
- end
164
-
165
- f = File.open(filename,'a')
166
- f.write content
167
- f.close
168
-
169
- nil
170
- end
171
-
172
-
173
-
174
- # Reads a file with rows with elementes separated by a given pattern
175
- # and builds a hash with it. The keys of the hash are the elements in
176
- # the :native positions, by default the first (0). The value for each
177
- # key is an array with one position for each of the rest possible
178
- # positions specified in :extra, by default all but the :native. Since
179
- # the native key may be repeated, each of the positions of the values
180
- # is in itself an array. There are a number of options to change this
181
- # behaviour.
182
- #
183
- # Options:
184
- # * :native => position of the elements that will constitute the keys. By default 0.
185
- # * :extra => positions of the rest of elements. By default all but :native. It can be an array of positions or a single position.
186
- # * :sep => pattern to use in splitting the lines into elements, by default "\t"
187
- # * :sep2 => pattern to use in splitting the elements into subelements, by default "|"
188
- # * :flatten => flatten the array of arrays that hold the values for each key into a simple array.
189
- # * :single => for each key select only the first of the values, instead of the complete array.
190
- # * :fix => A Proc that is called to pre-process the line
191
- # * :exclude => A Proc that is called to check if the line must be excluded from the process.
192
- # * :select => A Proc that is called to check if the line must be selected to process.
193
- def self.to_hash(input, options = {})
194
- native = options[:native] || 0
195
- extra = options[:extra]
196
- exclude = options[:exclude]
197
- select = options[:select]
198
- fix = options[:fix]
199
- sep = options[:sep] || "\t"
200
- sep2 = options[:sep2] || "|"
201
- single = options[:single]
202
- single = false if single.nil?
203
- flatten = options[:flatten]
204
- flatten = single if flatten.nil?
205
-
206
- extra = [extra] if extra && ! extra.is_a?( Array)
207
-
208
- if StringIO === input
209
- content = input
210
- else
211
- content = Open.read(input)
212
- end
213
-
214
- data = {}
215
- content.each_line{|l|
216
- l = fix.call(l) if fix
217
- next if exclude and exclude.call(l)
218
- next if select and ! select.call(l)
219
-
220
- row_fields = self.fields(l.chomp, sep)
221
- id = row_fields[native]
222
- next if id.nil? || id == ""
223
-
224
- data[id] ||= []
225
-
226
- if extra
227
- row_fields = row_fields.values_at(*extra)
228
- else
229
- row_fields.delete_at(native)
230
- end
231
-
232
-
233
- if flatten
234
- data[id] += row_fields.compact.collect{|v|
235
- v.split(sep2)
236
- }.flatten
237
- else
238
- row_fields.each_with_index{|value, i|
239
- next if value.nil?
240
- data[id][i] ||= []
241
- data[id][i] += value.split(sep2)
242
- }
243
- end
244
- }
245
-
246
- data = Hash[*(data.collect{|key,values| [key, values.first]}).flatten] if single
247
-
248
- data
249
- end
250
-
251
- end
@@ -1,183 +0,0 @@
1
- require 'rake'
2
-
3
- # Include the step_def and step methods to simplify Pipelines. Steps depend on
4
- # the step strictly above by default. The output of the step is save marshaled,
5
- # except for Strings which are save as text. The input of the step, the output
6
- # of the previous step if availabe is accessed with the input method
7
- #
8
- # Example::
9
- #
10
- # step :text do
11
- # "Text to revert"
12
- # end
13
- #
14
- # step :revert do
15
- # text = input
16
- # text.reverse
17
- # end
18
- #
19
- module Rake::Pipeline
20
-
21
- module Rake::Pipeline::Step
22
-
23
- class << self
24
-
25
- @@step_descriptions = {}
26
- def step_descriptions
27
- @@step_descriptions
28
- end
29
-
30
- def add_description(re, step, message)
31
- @@step_descriptions[re] = "#{ step }: #{ message }"
32
- end
33
-
34
- @@last_step = nil
35
- def step_def(name, dependencies = nil)
36
-
37
- re = Regexp.new(/(?:^|\/)#{name}\/.*$/)
38
-
39
- # Take the last_description and associate it with the name
40
- if Rake.application.last_description
41
- add_description(re, name, Rake.application.last_description)
42
- end
43
-
44
- if dependencies.nil? && ! @@last_step.nil?
45
- dependencies = @@last_step
46
- end
47
- @@last_step = name
48
-
49
- # Generate the Hash definition
50
- case
51
- when dependencies.nil?
52
- re
53
- when String === dependencies || Symbol === dependencies
54
- {re => lambda{|filename| filename.sub(name.to_s,dependencies.to_s) }}
55
- when Array === dependencies
56
- {re => lambda{|filename| dependencies.collect{|dep| filename.sub(name.to_s, dep.to_s) } }}
57
- when Proc === dependencies
58
- {re => dependencies}
59
- end
60
-
61
- end
62
-
63
- end
64
- end
65
-
66
- module Rake::Pipeline::Info
67
-
68
- def self.info_file(filename)
69
- filename.sub(/^(.*?)(?:[^\/]*)\/([^\/]*)$/, '\1.info/\2.yaml')
70
- end
71
-
72
- def self.load_info(t)
73
- filename = t.name
74
- info_filename = info_file(filename)
75
-
76
- if File.exists? info_filename
77
- YAML.load(File.open(info_filename))
78
- else
79
- {}
80
- end
81
- end
82
-
83
- def self.save_info(t, info = {})
84
- filename = t.name
85
- info_filename = info_file(filename)
86
-
87
- FileUtils.mkdir_p File.dirname(info_filename) unless File.exists? File.dirname(info_filename)
88
- File.open(info_filename,'w'){|file|
89
- file.write YAML.dump info
90
- }
91
- end
92
-
93
- end
94
-
95
-
96
- NON_ASCII_PRINTABLE = /[^\x20-\x7e\s]/
97
- def is_binary?(file)
98
- binary = file.read(1024) =~ NON_ASCII_PRINTABLE
99
- file.rewind
100
- binary
101
- end
102
-
103
- def step_descriptions
104
- Rake::Pipeline::Step.step_descriptions
105
- end
106
-
107
-
108
- def step_def(*args)
109
- Rake::Pipeline::Step.step_def(*args)
110
- end
111
-
112
- def infile(t, &block)
113
- File.open(t.prerequisites.first) do |f|
114
- block.call(f)
115
- end
116
- end
117
-
118
- def outfile(t, &block)
119
- File.open(t.name, 'w') do |f|
120
- block.call(f)
121
- end
122
- end
123
-
124
- def load_input(t)
125
- return nil if t.prerequisites.first.nil?
126
- infile(t){|f|
127
- if is_binary?(f)
128
- Marshal.load(f)
129
- else
130
- f.read
131
- end
132
- }
133
- end
134
-
135
- def save_output(t, output)
136
- case
137
- when output.nil?
138
- nil
139
- when String === output
140
- outfile(t){|f| f.write output }
141
- else
142
- outfile(t){|f| f.write Marshal.dump(output) }
143
- end
144
-
145
- end
146
-
147
- # We cannot load the input variable before the block.call, so we need another method
148
-
149
- # Load the input data from the previous step
150
- def input
151
- load_input(@@current_task) if @@current_task
152
- end
153
-
154
- # Add values to the info file
155
- def info(values = {})
156
- info = Rake::Pipeline::Info.load_info(@@current_task)
157
- info = info.merge values
158
- Rake::Pipeline::Info.save_info(@@current_task, info)
159
- info
160
- end
161
-
162
-
163
- # Define a new step, it depends on the previously defined by default. It
164
- # saves the output of the block so it can be loaded by the input method of
165
- # the next step
166
- def step(name, dependencies = nil, &block)
167
- rule step_def(name, dependencies) do |t|
168
-
169
- # Save the task object to be able to load the input
170
- @@current_task = t
171
-
172
- output = block.call(t)
173
-
174
- save_output(t, output)
175
- end
176
-
177
- end
178
- end
179
-
180
- if __FILE__ == $0
181
-
182
- p Rake::Pipeline::Info.info_file('work/diseases/t')
183
- end
@@ -1,87 +0,0 @@
1
- require 'parse_tree_extensions'
2
- require 'parse_tree'
3
- require 'ruby2ruby'
4
-
5
- # This class helps designing DSL in ruby based on method_missing. Class
6
- # is initialize with a block of code or a file with the code, and it is
7
- # given a method to be invoked instead of method missing. This class
8
- # deals simply with making the method_missing alias and removing it and
9
- # executing the block of file with code.
10
- class SimpleDSL
11
-
12
- class ConfigFileMissingError < StandardError; end
13
-
14
- private
15
-
16
- def hook_method(method = nil)
17
- method ||= :DSL_action
18
- @@restore_name = ("restore_DSL_" + method.to_s).to_sym
19
- @@method_name = method.to_sym
20
-
21
- class << self
22
- @restore_stack ||= []
23
- @restore_stack << @@restore_name
24
- alias_method(@@restore_name, :method_missing)
25
- alias_method(:method_missing, @@method_name)
26
- end
27
- end
28
-
29
- def unhook_method
30
- class << self
31
- alias_method(:method_missing, @restore_stack.pop)
32
- end
33
- end
34
-
35
- public
36
-
37
- def parse(method = nil, actions = nil, &block)
38
-
39
- actions ||= block
40
-
41
- hook_method(method)
42
-
43
- # Execute
44
- if actions.is_a? Proc
45
-
46
- @config[@@method_name] = actions.to_ruby.collect[1..-2].join
47
-
48
- instance_eval &actions
49
- elsif File.exists?(actions)
50
-
51
- @config[@@method_name] = File.open(actions).read
52
-
53
- eval File.open(actions).read
54
- end
55
-
56
- unhook_method
57
-
58
- end
59
-
60
-
61
- # Processes a DSL. +method+ is the name of the method executed instead
62
- # of method_missing. The code to be evaluated as a DSL is either
63
- # specified in +&block+ or in the file pointed by +file+.
64
- def initialize(method = nil, file = nil, &block)
65
- @config = {}
66
- if file
67
- raise ConfigFileMissingError.new "File '#{ file }' is missing. Have you installed the config files? (use rbbt_config)." unless File.exists? file
68
- parse(method, file)
69
- end
70
-
71
- if block
72
- parse(method, block)
73
- end
74
- end
75
-
76
- # Returns the code with the DSL that was executed. If it came from a
77
- # block it was turned to string using ruby2ruby.
78
- def config(action = nil)
79
- if action
80
- @config[action.to_sym]
81
- else
82
- @config[:DSL_action]
83
- end
84
- end
85
- end
86
-
87
-