rbbt 1.1.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +72 -136
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -246
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -145
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -79
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Rakefile +0 -43
  22. data/install_scripts/organisms/cgd.Rakefile +0 -84
  23. data/install_scripts/organisms/human.Rakefile +0 -145
  24. data/install_scripts/organisms/mgi.Rakefile +0 -77
  25. data/install_scripts/organisms/pombe.Rakefile +0 -40
  26. data/install_scripts/organisms/rake-include.rb +0 -258
  27. data/install_scripts/organisms/rgd.Rakefile +0 -88
  28. data/install_scripts/organisms/sgd.Rakefile +0 -66
  29. data/install_scripts/organisms/tair.Rakefile +0 -54
  30. data/install_scripts/organisms/worm.Rakefile +0 -109
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -86
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -40
  49. data/lib/rbbt/sources/organism.rb +0 -245
  50. data/lib/rbbt/sources/polysearch.rb +0 -117
  51. data/lib/rbbt/sources/pubmed.rb +0 -111
  52. data/lib/rbbt/util/arrayHash.rb +0 -255
  53. data/lib/rbbt/util/filecache.rb +0 -72
  54. data/lib/rbbt/util/index.rb +0 -47
  55. data/lib/rbbt/util/misc.rb +0 -106
  56. data/lib/rbbt/util/open.rb +0 -235
  57. data/lib/rbbt/util/rake.rb +0 -183
  58. data/lib/rbbt/util/simpleDSL.rb +0 -87
  59. data/lib/rbbt/util/tmpfile.rb +0 -19
  60. data/tasks/install.rake +0 -124
@@ -1,183 +0,0 @@
1
- require 'rake'
2
-
3
- # Include the step_def and step methods to simplify Pipelines. Steps depend on
4
- # the step strictly above by default. The output of the step is save marshaled,
5
- # except for Strings which are save as text. The input of the step, the output
6
- # of the previous step if availabe is accessed with the input method
7
- #
8
- # Example::
9
- #
10
- # step :text do
11
- # "Text to revert"
12
- # end
13
- #
14
- # step :revert do
15
- # text = input
16
- # text.reverse
17
- # end
18
- #
19
- module Rake::Pipeline
20
-
21
- module Rake::Pipeline::Step
22
-
23
- class << self
24
-
25
- @@step_descriptions = {}
26
- def step_descriptions
27
- @@step_descriptions
28
- end
29
-
30
- def add_description(re, step, message)
31
- @@step_descriptions[re] = "#{ step }: #{ message }"
32
- end
33
-
34
- @@last_step = nil
35
- def step_def(name, dependencies = nil)
36
-
37
- re = Regexp.new(/(?:^|\/)#{name}\/.*$/)
38
-
39
- # Take the last_description and associate it with the name
40
- if Rake.application.last_description
41
- add_description(re, name, Rake.application.last_description)
42
- end
43
-
44
- if dependencies.nil? && ! @@last_step.nil?
45
- dependencies = @@last_step
46
- end
47
- @@last_step = name
48
-
49
- # Generate the Hash definition
50
- case
51
- when dependencies.nil?
52
- re
53
- when String === dependencies || Symbol === dependencies
54
- {re => lambda{|filename| filename.sub(name.to_s,dependencies.to_s) }}
55
- when Array === dependencies
56
- {re => lambda{|filename| dependencies.collect{|dep| filename.sub(name.to_s, dep.to_s) } }}
57
- when Proc === dependencies
58
- {re => dependencies}
59
- end
60
-
61
- end
62
-
63
- end
64
- end
65
-
66
- module Rake::Pipeline::Info
67
-
68
- def self.info_file(filename)
69
- filename.sub(/^(.*?)(?:[^\/]*)\/([^\/]*)$/, '\1.info/\2.yaml')
70
- end
71
-
72
- def self.load_info(t)
73
- filename = t.name
74
- info_filename = info_file(filename)
75
-
76
- if File.exists? info_filename
77
- YAML.load(File.open(info_filename))
78
- else
79
- {}
80
- end
81
- end
82
-
83
- def self.save_info(t, info = {})
84
- filename = t.name
85
- info_filename = info_file(filename)
86
-
87
- FileUtils.mkdir_p File.dirname(info_filename) unless File.exists? File.dirname(info_filename)
88
- File.open(info_filename,'w'){|file|
89
- file.write YAML.dump info
90
- }
91
- end
92
-
93
- end
94
-
95
-
96
- NON_ASCII_PRINTABLE = /[^\x20-\x7e\s]/
97
- def is_binary?(file)
98
- binary = file.read(1024) =~ NON_ASCII_PRINTABLE
99
- file.rewind
100
- binary
101
- end
102
-
103
- def step_descriptions
104
- Rake::Pipeline::Step.step_descriptions
105
- end
106
-
107
-
108
- def step_def(*args)
109
- Rake::Pipeline::Step.step_def(*args)
110
- end
111
-
112
- def infile(t, &block)
113
- File.open(t.prerequisites.first) do |f|
114
- block.call(f)
115
- end
116
- end
117
-
118
- def outfile(t, &block)
119
- File.open(t.name, 'w') do |f|
120
- block.call(f)
121
- end
122
- end
123
-
124
- def load_input(t)
125
- return nil if t.prerequisites.first.nil?
126
- infile(t){|f|
127
- if is_binary?(f)
128
- Marshal.load(f)
129
- else
130
- f.read
131
- end
132
- }
133
- end
134
-
135
- def save_output(t, output)
136
- case
137
- when output.nil?
138
- nil
139
- when String === output
140
- outfile(t){|f| f.write output }
141
- else
142
- outfile(t){|f| f.write Marshal.dump(output) }
143
- end
144
-
145
- end
146
-
147
- # We cannot load the input variable before the block.call, so we need another method
148
-
149
- # Load the input data from the previous step
150
- def input
151
- load_input(@@current_task) if @@current_task
152
- end
153
-
154
- # Add values to the info file
155
- def info(values = {})
156
- info = Rake::Pipeline::Info.load_info(@@current_task)
157
- info = info.merge values
158
- Rake::Pipeline::Info.save_info(@@current_task, info)
159
- info
160
- end
161
-
162
-
163
- # Define a new step, it depends on the previously defined by default. It
164
- # saves the output of the block so it can be loaded by the input method of
165
- # the next step
166
- def step(name, dependencies = nil, &block)
167
- rule step_def(name, dependencies) do |t|
168
-
169
- # Save the task object to be able to load the input
170
- @@current_task = t
171
-
172
- output = block.call(t)
173
-
174
- save_output(t, output)
175
- end
176
-
177
- end
178
- end
179
-
180
- if __FILE__ == $0
181
-
182
- p Rake::Pipeline::Info.info_file('work/diseases/t')
183
- end
@@ -1,87 +0,0 @@
1
- require 'parse_tree_extensions'
2
- require 'parse_tree'
3
- require 'ruby2ruby'
4
-
5
- # This class helps designing DSL in ruby based on method_missing. Class
6
- # is initialize with a block of code or a file with the code, and it is
7
- # given a method to be invoked instead of method missing. This class
8
- # deals simply with making the method_missing alias and removing it and
9
- # executing the block of file with code.
10
- class SimpleDSL
11
-
12
- class ConfigFileMissingError < StandardError; end
13
-
14
- private
15
-
16
- def hook_method(method = nil)
17
- method ||= :DSL_action
18
- @@restore_name = ("restore_DSL_" + method.to_s).to_sym
19
- @@method_name = method.to_sym
20
-
21
- class << self
22
- @restore_stack ||= []
23
- @restore_stack << @@restore_name
24
- alias_method(@@restore_name, :method_missing)
25
- alias_method(:method_missing, @@method_name)
26
- end
27
- end
28
-
29
- def unhook_method
30
- class << self
31
- alias_method(:method_missing, @restore_stack.pop)
32
- end
33
- end
34
-
35
- public
36
-
37
- def parse(method = nil, actions = nil, &block)
38
-
39
- actions ||= block
40
-
41
- hook_method(method)
42
-
43
- # Execute
44
- if actions.is_a? Proc
45
-
46
- @config[@@method_name] = actions.to_ruby.collect[1..-2].join
47
-
48
- instance_eval &actions
49
- elsif File.exists?(actions)
50
-
51
- @config[@@method_name] = File.open(actions).read
52
-
53
- eval File.open(actions).read
54
- end
55
-
56
- unhook_method
57
-
58
- end
59
-
60
-
61
- # Processes a DSL. +method+ is the name of the method executed instead
62
- # of method_missing. The code to be evaluated as a DSL is either
63
- # specified in +&block+ or in the file pointed by +file+.
64
- def initialize(method = nil, file = nil, &block)
65
- @config = {}
66
- if file
67
- raise ConfigFileMissingError.new "File '#{ file }' is missing. Have you installed the config files? (use rbbt_config)." unless File.exists? file
68
- parse(method, file)
69
- end
70
-
71
- if block
72
- parse(method, block)
73
- end
74
- end
75
-
76
- # Returns the code with the DSL that was executed. If it came from a
77
- # block it was turned to string using ruby2ruby.
78
- def config(action = nil)
79
- if action
80
- @config[action.to_sym]
81
- else
82
- @config[:DSL_action]
83
- end
84
- end
85
- end
86
-
87
-
@@ -1,19 +0,0 @@
1
- require 'fileutils'
2
- require 'rbbt'
3
-
4
-
5
- module TmpFile
6
-
7
- # Creates a random file name, with the given suffix and a random number
8
- # up to +max+
9
- def self.random_name( s="",max=10000000)
10
- n = rand(max)
11
- s << n.to_s
12
- s
13
- end
14
-
15
- # Creates a random filename in the temporary directory
16
- def self.tmp_file(s = "",max=10000000)
17
- File.join(Rbbt.tmpdir,random_name(s,max))
18
- end
19
- end
@@ -1,124 +0,0 @@
1
- require 'rbbt'
2
-
3
- $datadir = Rbbt.datadir
4
- $scriptdir = File.join(Rbbt.rootdir, '/install_scripts')
5
-
6
-
7
- task 'abner' do
8
- directory = "#{$datadir}/third_party/abner/"
9
- if !File.exists?(File.join(directory, 'abner.jar')) || $force
10
- FileUtils.mkdir_p directory
11
- `cd #{directory};rm -Rf *; #{$scriptdir}/get_abner.sh;cd -`
12
- end
13
- end
14
-
15
- task 'banner' do
16
- directory = "#{$datadir}/third_party/banner/"
17
- if !File.exists?(File.join(directory, 'banner.jar')) || $force
18
- FileUtils.mkdir_p directory
19
- `cd #{directory};rm -Rf *; #{$scriptdir}/get_banner.sh;cd -`
20
- end
21
- end
22
-
23
- task 'crf++' do
24
- directory = "#{$datadir}/third_party/crf++/"
25
- if !File.exists?(File.join(directory, 'ruby/CRFPP.so')) || $force
26
- FileUtils.mkdir_p directory
27
- `cd #{directory};rm -Rf *; #{$scriptdir}/get_crf++.sh;cd -`
28
- end
29
- end
30
-
31
-
32
-
33
- task 'wordlists' do
34
- FileUtils.cp_r File.join($scriptdir, 'wordlists/'), $datadir
35
- end
36
-
37
- task 'polysearch' do
38
- directory = "#{$datadir}/dbs/polysearch/"
39
- if !File.exists?(File.join(directory,'disease.txt')) || $force
40
- FileUtils.mkdir_p directory
41
- `cd #{directory}/; rm * -Rf; #{$scriptdir}/get_polysearch.sh;cd -`
42
- end
43
- end
44
-
45
-
46
- task '3party' => %w(abner banner crf++)
47
-
48
- task 'entrez' do
49
- directory = "#{$datadir}/dbs/entrez/"
50
- if !File.exists?(File.join(directory,'gene_info')) || $force
51
- FileUtils.mkdir_p directory
52
- `cd #{directory}/; rm * -Rf; #{$scriptdir}/get_entrez.sh;cd -`
53
- end
54
- end
55
-
56
- task 'go' do
57
- directory = "#{$datadir}/dbs/go/"
58
- if !File.exists?(File.join(directory,'gene_ontology.obo')) || $force
59
- FileUtils.mkdir_p directory
60
- `cd #{directory}/; rm * -Rf; #{$scriptdir}/get_go.sh;cd -`
61
- end
62
- end
63
-
64
- task 'biocreative' do
65
- directory = "#{$datadir}/biocreative/"
66
- if !File.exists?(File.join(directory, 'BC2GN')) || $force
67
- FileUtils.mkdir_p directory
68
- `cd #{directory};rm -Rf *; #{$scriptdir}/get_biocreative.sh;cd -`
69
- end
70
- end
71
-
72
-
73
- task 'datasets' => %w(entrez biocreative)
74
-
75
- task 'organisms' do
76
- directory = "#{$datadir}/organisms"
77
- FileUtils.mkdir_p directory
78
- %w(Rakefile rake-include.rb).each{|f|
79
- FileUtils.cp_r File.join($scriptdir, "organisms/#{ f }"), directory
80
- }
81
- Dir.glob(File.join($scriptdir, "organisms/*.Rakefile")).each{|f|
82
- org = File.basename(f).sub(/.Rakefile/,'')
83
- if !File.exists?(File.join(directory, org))
84
- FileUtils.mkdir_p File.join(directory, org)
85
- end
86
- FileUtils.cp f , File.join(directory, "#{ org }/Rakefile")
87
- }
88
- `cd #{directory}; rake names`
89
- end
90
-
91
- task 'ner' do
92
- directory = "#{$datadir}/ner"
93
- FileUtils.mkdir_p directory
94
- %w(Rakefile config).each{|f|
95
- FileUtils.cp_r File.join($scriptdir, "ner/#{ f }"), directory
96
- }
97
-
98
- %w(data model results).each{|d|
99
- FileUtils.mkdir_p File.join(directory, d)
100
- }
101
- end
102
-
103
- task 'norm' do
104
- directory = "#{$datadir}/norm"
105
- FileUtils.mkdir_p directory
106
- %w(Rakefile config functions.sh).each{|f|
107
- FileUtils.cp_r File.join($scriptdir, "norm/#{ f }"), directory
108
- }
109
- %w(results models).each{|d|
110
- FileUtils.mkdir_p File.join(directory, d)
111
- }
112
- end
113
-
114
- task 'classifier' do
115
- directory = "#{$datadir}/classifier"
116
- FileUtils.mkdir_p directory
117
- %w(Rakefile R).each{|f|
118
- FileUtils.cp_r File.join($scriptdir, "classifier/#{ f }"), directory
119
- }
120
- %w(data model results).each{|d|
121
- FileUtils.mkdir_p File.join(directory, d)
122
- }
123
- end
124
-