rbbt 1.1.7 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +72 -136
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -246
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -145
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -79
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Rakefile +0 -43
  22. data/install_scripts/organisms/cgd.Rakefile +0 -84
  23. data/install_scripts/organisms/human.Rakefile +0 -145
  24. data/install_scripts/organisms/mgi.Rakefile +0 -77
  25. data/install_scripts/organisms/pombe.Rakefile +0 -40
  26. data/install_scripts/organisms/rake-include.rb +0 -258
  27. data/install_scripts/organisms/rgd.Rakefile +0 -88
  28. data/install_scripts/organisms/sgd.Rakefile +0 -66
  29. data/install_scripts/organisms/tair.Rakefile +0 -54
  30. data/install_scripts/organisms/worm.Rakefile +0 -109
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -86
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -40
  49. data/lib/rbbt/sources/organism.rb +0 -245
  50. data/lib/rbbt/sources/polysearch.rb +0 -117
  51. data/lib/rbbt/sources/pubmed.rb +0 -111
  52. data/lib/rbbt/util/arrayHash.rb +0 -255
  53. data/lib/rbbt/util/filecache.rb +0 -72
  54. data/lib/rbbt/util/index.rb +0 -47
  55. data/lib/rbbt/util/misc.rb +0 -106
  56. data/lib/rbbt/util/open.rb +0 -235
  57. data/lib/rbbt/util/rake.rb +0 -183
  58. data/lib/rbbt/util/simpleDSL.rb +0 -87
  59. data/lib/rbbt/util/tmpfile.rb +0 -19
  60. data/tasks/install.rake +0 -124
@@ -1,183 +0,0 @@
1
- require 'rake'
2
-
3
- # Include the step_def and step methods to simplify Pipelines. Steps depend on
4
- # the step strictly above by default. The output of the step is save marshaled,
5
- # except for Strings which are save as text. The input of the step, the output
6
- # of the previous step if availabe is accessed with the input method
7
- #
8
- # Example::
9
- #
10
- # step :text do
11
- # "Text to revert"
12
- # end
13
- #
14
- # step :revert do
15
- # text = input
16
- # text.reverse
17
- # end
18
- #
19
- module Rake::Pipeline
20
-
21
- module Rake::Pipeline::Step
22
-
23
- class << self
24
-
25
- @@step_descriptions = {}
26
- def step_descriptions
27
- @@step_descriptions
28
- end
29
-
30
- def add_description(re, step, message)
31
- @@step_descriptions[re] = "#{ step }: #{ message }"
32
- end
33
-
34
- @@last_step = nil
35
- def step_def(name, dependencies = nil)
36
-
37
- re = Regexp.new(/(?:^|\/)#{name}\/.*$/)
38
-
39
- # Take the last_description and associate it with the name
40
- if Rake.application.last_description
41
- add_description(re, name, Rake.application.last_description)
42
- end
43
-
44
- if dependencies.nil? && ! @@last_step.nil?
45
- dependencies = @@last_step
46
- end
47
- @@last_step = name
48
-
49
- # Generate the Hash definition
50
- case
51
- when dependencies.nil?
52
- re
53
- when String === dependencies || Symbol === dependencies
54
- {re => lambda{|filename| filename.sub(name.to_s,dependencies.to_s) }}
55
- when Array === dependencies
56
- {re => lambda{|filename| dependencies.collect{|dep| filename.sub(name.to_s, dep.to_s) } }}
57
- when Proc === dependencies
58
- {re => dependencies}
59
- end
60
-
61
- end
62
-
63
- end
64
- end
65
-
66
- module Rake::Pipeline::Info
67
-
68
- def self.info_file(filename)
69
- filename.sub(/^(.*?)(?:[^\/]*)\/([^\/]*)$/, '\1.info/\2.yaml')
70
- end
71
-
72
- def self.load_info(t)
73
- filename = t.name
74
- info_filename = info_file(filename)
75
-
76
- if File.exists? info_filename
77
- YAML.load(File.open(info_filename))
78
- else
79
- {}
80
- end
81
- end
82
-
83
- def self.save_info(t, info = {})
84
- filename = t.name
85
- info_filename = info_file(filename)
86
-
87
- FileUtils.mkdir_p File.dirname(info_filename) unless File.exists? File.dirname(info_filename)
88
- File.open(info_filename,'w'){|file|
89
- file.write YAML.dump info
90
- }
91
- end
92
-
93
- end
94
-
95
-
96
- NON_ASCII_PRINTABLE = /[^\x20-\x7e\s]/
97
- def is_binary?(file)
98
- binary = file.read(1024) =~ NON_ASCII_PRINTABLE
99
- file.rewind
100
- binary
101
- end
102
-
103
- def step_descriptions
104
- Rake::Pipeline::Step.step_descriptions
105
- end
106
-
107
-
108
- def step_def(*args)
109
- Rake::Pipeline::Step.step_def(*args)
110
- end
111
-
112
- def infile(t, &block)
113
- File.open(t.prerequisites.first) do |f|
114
- block.call(f)
115
- end
116
- end
117
-
118
- def outfile(t, &block)
119
- File.open(t.name, 'w') do |f|
120
- block.call(f)
121
- end
122
- end
123
-
124
- def load_input(t)
125
- return nil if t.prerequisites.first.nil?
126
- infile(t){|f|
127
- if is_binary?(f)
128
- Marshal.load(f)
129
- else
130
- f.read
131
- end
132
- }
133
- end
134
-
135
- def save_output(t, output)
136
- case
137
- when output.nil?
138
- nil
139
- when String === output
140
- outfile(t){|f| f.write output }
141
- else
142
- outfile(t){|f| f.write Marshal.dump(output) }
143
- end
144
-
145
- end
146
-
147
- # We cannot load the input variable before the block.call, so we need another method
148
-
149
- # Load the input data from the previous step
150
- def input
151
- load_input(@@current_task) if @@current_task
152
- end
153
-
154
- # Add values to the info file
155
- def info(values = {})
156
- info = Rake::Pipeline::Info.load_info(@@current_task)
157
- info = info.merge values
158
- Rake::Pipeline::Info.save_info(@@current_task, info)
159
- info
160
- end
161
-
162
-
163
- # Define a new step, it depends on the previously defined by default. It
164
- # saves the output of the block so it can be loaded by the input method of
165
- # the next step
166
- def step(name, dependencies = nil, &block)
167
- rule step_def(name, dependencies) do |t|
168
-
169
- # Save the task object to be able to load the input
170
- @@current_task = t
171
-
172
- output = block.call(t)
173
-
174
- save_output(t, output)
175
- end
176
-
177
- end
178
- end
179
-
180
- if __FILE__ == $0
181
-
182
- p Rake::Pipeline::Info.info_file('work/diseases/t')
183
- end
@@ -1,87 +0,0 @@
1
- require 'parse_tree_extensions'
2
- require 'parse_tree'
3
- require 'ruby2ruby'
4
-
5
- # This class helps designing DSL in ruby based on method_missing. Class
6
- # is initialize with a block of code or a file with the code, and it is
7
- # given a method to be invoked instead of method missing. This class
8
- # deals simply with making the method_missing alias and removing it and
9
- # executing the block of file with code.
10
- class SimpleDSL
11
-
12
- class ConfigFileMissingError < StandardError; end
13
-
14
- private
15
-
16
- def hook_method(method = nil)
17
- method ||= :DSL_action
18
- @@restore_name = ("restore_DSL_" + method.to_s).to_sym
19
- @@method_name = method.to_sym
20
-
21
- class << self
22
- @restore_stack ||= []
23
- @restore_stack << @@restore_name
24
- alias_method(@@restore_name, :method_missing)
25
- alias_method(:method_missing, @@method_name)
26
- end
27
- end
28
-
29
- def unhook_method
30
- class << self
31
- alias_method(:method_missing, @restore_stack.pop)
32
- end
33
- end
34
-
35
- public
36
-
37
- def parse(method = nil, actions = nil, &block)
38
-
39
- actions ||= block
40
-
41
- hook_method(method)
42
-
43
- # Execute
44
- if actions.is_a? Proc
45
-
46
- @config[@@method_name] = actions.to_ruby.collect[1..-2].join
47
-
48
- instance_eval &actions
49
- elsif File.exists?(actions)
50
-
51
- @config[@@method_name] = File.open(actions).read
52
-
53
- eval File.open(actions).read
54
- end
55
-
56
- unhook_method
57
-
58
- end
59
-
60
-
61
- # Processes a DSL. +method+ is the name of the method executed instead
62
- # of method_missing. The code to be evaluated as a DSL is either
63
- # specified in +&block+ or in the file pointed by +file+.
64
- def initialize(method = nil, file = nil, &block)
65
- @config = {}
66
- if file
67
- raise ConfigFileMissingError.new "File '#{ file }' is missing. Have you installed the config files? (use rbbt_config)." unless File.exists? file
68
- parse(method, file)
69
- end
70
-
71
- if block
72
- parse(method, block)
73
- end
74
- end
75
-
76
- # Returns the code with the DSL that was executed. If it came from a
77
- # block it was turned to string using ruby2ruby.
78
- def config(action = nil)
79
- if action
80
- @config[action.to_sym]
81
- else
82
- @config[:DSL_action]
83
- end
84
- end
85
- end
86
-
87
-
@@ -1,19 +0,0 @@
1
- require 'fileutils'
2
- require 'rbbt'
3
-
4
-
5
- module TmpFile
6
-
7
- # Creates a random file name, with the given suffix and a random number
8
- # up to +max+
9
- def self.random_name( s="",max=10000000)
10
- n = rand(max)
11
- s << n.to_s
12
- s
13
- end
14
-
15
- # Creates a random filename in the temporary directory
16
- def self.tmp_file(s = "",max=10000000)
17
- File.join(Rbbt.tmpdir,random_name(s,max))
18
- end
19
- end
@@ -1,124 +0,0 @@
1
- require 'rbbt'
2
-
3
- $datadir = Rbbt.datadir
4
- $scriptdir = File.join(Rbbt.rootdir, '/install_scripts')
5
-
6
-
7
- task 'abner' do
8
- directory = "#{$datadir}/third_party/abner/"
9
- if !File.exists?(File.join(directory, 'abner.jar')) || $force
10
- FileUtils.mkdir_p directory
11
- `cd #{directory};rm -Rf *; #{$scriptdir}/get_abner.sh;cd -`
12
- end
13
- end
14
-
15
- task 'banner' do
16
- directory = "#{$datadir}/third_party/banner/"
17
- if !File.exists?(File.join(directory, 'banner.jar')) || $force
18
- FileUtils.mkdir_p directory
19
- `cd #{directory};rm -Rf *; #{$scriptdir}/get_banner.sh;cd -`
20
- end
21
- end
22
-
23
- task 'crf++' do
24
- directory = "#{$datadir}/third_party/crf++/"
25
- if !File.exists?(File.join(directory, 'ruby/CRFPP.so')) || $force
26
- FileUtils.mkdir_p directory
27
- `cd #{directory};rm -Rf *; #{$scriptdir}/get_crf++.sh;cd -`
28
- end
29
- end
30
-
31
-
32
-
33
- task 'wordlists' do
34
- FileUtils.cp_r File.join($scriptdir, 'wordlists/'), $datadir
35
- end
36
-
37
- task 'polysearch' do
38
- directory = "#{$datadir}/dbs/polysearch/"
39
- if !File.exists?(File.join(directory,'disease.txt')) || $force
40
- FileUtils.mkdir_p directory
41
- `cd #{directory}/; rm * -Rf; #{$scriptdir}/get_polysearch.sh;cd -`
42
- end
43
- end
44
-
45
-
46
- task '3party' => %w(abner banner crf++)
47
-
48
- task 'entrez' do
49
- directory = "#{$datadir}/dbs/entrez/"
50
- if !File.exists?(File.join(directory,'gene_info')) || $force
51
- FileUtils.mkdir_p directory
52
- `cd #{directory}/; rm * -Rf; #{$scriptdir}/get_entrez.sh;cd -`
53
- end
54
- end
55
-
56
- task 'go' do
57
- directory = "#{$datadir}/dbs/go/"
58
- if !File.exists?(File.join(directory,'gene_ontology.obo')) || $force
59
- FileUtils.mkdir_p directory
60
- `cd #{directory}/; rm * -Rf; #{$scriptdir}/get_go.sh;cd -`
61
- end
62
- end
63
-
64
- task 'biocreative' do
65
- directory = "#{$datadir}/biocreative/"
66
- if !File.exists?(File.join(directory, 'BC2GN')) || $force
67
- FileUtils.mkdir_p directory
68
- `cd #{directory};rm -Rf *; #{$scriptdir}/get_biocreative.sh;cd -`
69
- end
70
- end
71
-
72
-
73
- task 'datasets' => %w(entrez biocreative)
74
-
75
- task 'organisms' do
76
- directory = "#{$datadir}/organisms"
77
- FileUtils.mkdir_p directory
78
- %w(Rakefile rake-include.rb).each{|f|
79
- FileUtils.cp_r File.join($scriptdir, "organisms/#{ f }"), directory
80
- }
81
- Dir.glob(File.join($scriptdir, "organisms/*.Rakefile")).each{|f|
82
- org = File.basename(f).sub(/.Rakefile/,'')
83
- if !File.exists?(File.join(directory, org))
84
- FileUtils.mkdir_p File.join(directory, org)
85
- end
86
- FileUtils.cp f , File.join(directory, "#{ org }/Rakefile")
87
- }
88
- `cd #{directory}; rake names`
89
- end
90
-
91
- task 'ner' do
92
- directory = "#{$datadir}/ner"
93
- FileUtils.mkdir_p directory
94
- %w(Rakefile config).each{|f|
95
- FileUtils.cp_r File.join($scriptdir, "ner/#{ f }"), directory
96
- }
97
-
98
- %w(data model results).each{|d|
99
- FileUtils.mkdir_p File.join(directory, d)
100
- }
101
- end
102
-
103
- task 'norm' do
104
- directory = "#{$datadir}/norm"
105
- FileUtils.mkdir_p directory
106
- %w(Rakefile config functions.sh).each{|f|
107
- FileUtils.cp_r File.join($scriptdir, "norm/#{ f }"), directory
108
- }
109
- %w(results models).each{|d|
110
- FileUtils.mkdir_p File.join(directory, d)
111
- }
112
- end
113
-
114
- task 'classifier' do
115
- directory = "#{$datadir}/classifier"
116
- FileUtils.mkdir_p directory
117
- %w(Rakefile R).each{|f|
118
- FileUtils.cp_r File.join($scriptdir, "classifier/#{ f }"), directory
119
- }
120
- %w(data model results).each{|d|
121
- FileUtils.mkdir_p File.join(directory, d)
122
- }
123
- end
124
-