rbbt 1.1.7 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +72 -136
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -246
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -145
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -79
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Rakefile +0 -43
  22. data/install_scripts/organisms/cgd.Rakefile +0 -84
  23. data/install_scripts/organisms/human.Rakefile +0 -145
  24. data/install_scripts/organisms/mgi.Rakefile +0 -77
  25. data/install_scripts/organisms/pombe.Rakefile +0 -40
  26. data/install_scripts/organisms/rake-include.rb +0 -258
  27. data/install_scripts/organisms/rgd.Rakefile +0 -88
  28. data/install_scripts/organisms/sgd.Rakefile +0 -66
  29. data/install_scripts/organisms/tair.Rakefile +0 -54
  30. data/install_scripts/organisms/worm.Rakefile +0 -109
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -86
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -40
  49. data/lib/rbbt/sources/organism.rb +0 -245
  50. data/lib/rbbt/sources/polysearch.rb +0 -117
  51. data/lib/rbbt/sources/pubmed.rb +0 -111
  52. data/lib/rbbt/util/arrayHash.rb +0 -255
  53. data/lib/rbbt/util/filecache.rb +0 -72
  54. data/lib/rbbt/util/index.rb +0 -47
  55. data/lib/rbbt/util/misc.rb +0 -106
  56. data/lib/rbbt/util/open.rb +0 -235
  57. data/lib/rbbt/util/rake.rb +0 -183
  58. data/lib/rbbt/util/simpleDSL.rb +0 -87
  59. data/lib/rbbt/util/tmpfile.rb +0 -19
  60. data/tasks/install.rake +0 -124
@@ -1,105 +0,0 @@
1
- require 'rbbt/util/open'
2
- require 'rbbt'
3
-
4
- # This module interacts with BioMart. It performs queries to BioMart and
5
- # synthesises a hash with the results. Note that this module connects to the
6
- # online BioMart WS using the Open in 'rbbt/util/open' module which offers
7
- # caching by default. To obtain up to date results you may need to clear the
8
- # cache from previous queries.
9
- module BioMart
10
-
11
- class BioMart::QueryError < StandardError; end
12
- private
13
-
14
- @@biomart_query_xml = <<-EOT
15
- <?xml version="1.0" encoding="UTF-8"?>
16
- <!DOCTYPE Query>
17
- <Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" >
18
- <Dataset name = "<!--DATABASE-->" interface = "default" >
19
- <!--FILTERS-->
20
- <!--MAIN-->
21
- <!--ATTRIBUTES-->
22
- </Dataset>
23
- </Query>
24
- EOT
25
-
26
-
27
-
28
-
29
- def self.get(database, main, attrs = nil, filters = nil, data = nil)
30
- attrs ||= []
31
- filters ||= ["with_#{main}"]
32
- data ||= {}
33
-
34
- query = @@biomart_query_xml.clone
35
- query.sub!(/<!--DATABASE-->/,database)
36
- query.sub!(/<!--FILTERS-->/, filters.collect{|name| "<Filter name = \"#{ name }\" excluded = \"0\"/>"}.join("\n") )
37
- query.sub!(/<!--MAIN-->/,"<Attribute name = \"#{main}\" />")
38
- query.sub!(/<!--ATTRIBUTES-->/, attrs.collect{|name| "<Attribute name = \"#{ name }\"/>"}.join("\n") )
39
-
40
- response = Open.read('http://www.biomart.org/biomart/martservice?query=' + query.gsub(/\n/,' '))
41
- if response =~ /Query ERROR:/
42
- raise BioMart::QueryError, response
43
- end
44
-
45
- response.each_line{|l|
46
- parts = l.chomp.split(/\t/)
47
- main = parts.shift
48
- next if main.nil? || main.empty?
49
-
50
- data[main] ||= {}
51
- attrs.each{|name|
52
- value = parts.shift
53
- data[main][name] ||= []
54
- next if value.nil?
55
- data[main][name] << value
56
- }
57
- }
58
-
59
- data
60
-
61
- end
62
-
63
- public
64
-
65
- # This method performs a query in biomart for a datasets and a given set of
66
- # attributes, there must be a main attribute that will be used as the key in
67
- # the result hash, optionally there may be a list of additional attributes
68
- # and filters. The data parameter at the end is used internally to
69
- # incrementally building the result, due to a limitation of the BioMart WS
70
- # that only allows 3 external arguments, users normally should leave it
71
- # unspecified or nil. The result is a hash, where the keys are the different
72
- # values for the main attribute, and the value is a hash with every other
73
- # attribute as key, and as value and array with all possible values (Note
74
- # that for a given value of the main attribute, there may be more than one
75
- # value for another attribute). If filters is left a nil it adds a filter to
76
- # the BioMart query to remove results with the main attribute empty, this may
77
- # cause an error if the BioMart WS does not allow filtering with that
78
- # attribute.
79
- def self.query(database, main, attrs = nil, filters = nil, data = nil)
80
- attrs ||= []
81
- data ||= {}
82
-
83
- chunks = []
84
- chunk = []
85
- attrs.each{|a|
86
- chunk << a
87
- if chunk.length == 2
88
- chunks << chunk
89
- chunk = []
90
- end
91
- }
92
-
93
- chunks << chunk if chunk.any?
94
-
95
- chunks.each{|chunk|
96
- data = get(database,main,chunk, filters, data)
97
- }
98
-
99
- data
100
- end
101
-
102
-
103
-
104
- end
105
-
@@ -1,211 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/open'
3
- require 'rbbt/util/tmpfile'
4
- require 'rbbt/util/filecache'
5
- require 'rbbt/bow/bow.rb'
6
- require 'set'
7
-
8
-
9
- # This module is used to parse and extract information from the
10
- # gene_info file at Entrez Gene, as well as from the gene2pubmed file.
11
- # Both need to be downloaded and accesible for Rbbt, which is done as
12
- # part of a normal installation.
13
- module Entrez
14
-
15
- class NoFileError < StandardError; end
16
-
17
- # Given a taxonomy, or set of taxonomies, it returns an inverse hash,
18
- # where each key is the entrez id of a gene, and the value is an array
19
- # of possible synonyms in other databases. Is mostly used to translate
20
- # entrez ids to the native database id of the organism. The parameter
21
- # +native+ specifies the position of the key containing synonym, the
22
- # fifth by default, +fix+ and +check+ are Procs used, if present, to
23
- # pre-process lines and to check if they should be processed.
24
- def self.entrez2native(taxs, native = nil, fix = nil, check = nil)
25
-
26
- raise NoFileError, "Install the Entrez gene_info file" unless File.exists? File.join(Rbbt.datadir, 'dbs/entrez/gene_info')
27
-
28
- native ||= 5
29
-
30
- taxs = [taxs] unless taxs.is_a?(Array)
31
- taxs = taxs.collect{|t| t.to_s}
32
-
33
- lexicon = {}
34
- tmp = TmpFile.tmp_file("entrez-")
35
- system "cat '#{File.join(Rbbt.datadir, 'dbs/entrez/gene_info')}' |grep '^\\(#{taxs.join('\\|')}\\)[[:space:]]' > #{tmp}"
36
- File.open(tmp).each{|l|
37
- parts = l.chomp.split(/\t/)
38
- next if parts[native] == '-'
39
- entrez = parts[1]
40
- parts[native].split(/\|/).each{|id|
41
- id = fix.call(id) if fix
42
- next if check && !check.call(id)
43
-
44
- lexicon[entrez] ||= []
45
- lexicon[entrez] << id
46
- }
47
- }
48
- FileUtils.rm tmp
49
-
50
- lexicon
51
- end
52
-
53
- # For a given taxonomy, or set of taxonomies, it returns a hash with
54
- # genes as keys and arrays of related PubMed ids as values, as
55
- # extracted from the gene2pubmed file from Entrez Gene.
56
- def self.entrez2pubmed(taxs)
57
- raise NoFileError, "Install the Entrez gene2pubmed file" unless File.exists? File.join(Rbbt.datadir, 'dbs/entrez/gene2pubmed')
58
-
59
- taxs = [taxs] unless taxs.is_a?(Array)
60
- taxs = taxs.collect{|t| t.to_s}
61
-
62
- data = {}
63
- tmp = TmpFile.tmp_file("entrez-")
64
- system "cat '#{File.join(Rbbt.datadir, 'dbs/entrez/gene2pubmed')}' |grep '^\\(#{taxs.join('\\|')}\\)[[:space:]]' > #{tmp}"
65
-
66
- data = Open.to_hash(tmp, :native => 1, :extra => 2).each{|code, value_lists| value_lists.flatten!}
67
-
68
- FileUtils.rm tmp
69
-
70
- data
71
- end
72
-
73
-
74
-
75
- # This class parses an xml containing the information for a particular
76
- # gene as served by Entrez Gene, and hold some of its information.
77
- class Gene
78
- attr_reader :organism, :symbol, :description, :aka, :protnames, :summary, :comentaries
79
-
80
- def initialize(xml)
81
- return if xml.nil?
82
-
83
- @organism = xml.scan(/<Org-ref_taxname>(.*)<\/Org-ref_taxname>/s)
84
- @symbol = xml.scan(/<Gene-ref_locus>(.*)<\/Gene-ref_locus>/s)
85
- @description = xml.scan(/<Gene-ref_desc>(.*)<\/Gene-ref_desc>/s)
86
- @aka = xml.scan(/<Gene-ref_syn_E>(.*)<\Gene-ref_syn_E>/s)
87
- @protnames = xml.scan(/<Prot-ref_name_E>(.*)<\/Prot-ref_name_E>/s)
88
- @summary = xml.scan(/<Entrezgene_summary>(.*)<\/Entrezgene_summary>/s)
89
- @comentaries = xml.scan(/<Gene-commentary_text>(.*)<\/Gene-commentary_text>/s)
90
-
91
-
92
- end
93
-
94
- # Joins the text from symbol, description, aka, protnames, and
95
- # summary
96
- def text
97
- #[@organism, @symbol, @description, @aka, @protnames, @summary,@comentaries.join(". ")].join(". ")
98
- [@symbol, @description, @aka, @protnames, @summary].flatten.join(". ")
99
- end
100
- end
101
-
102
- private
103
-
104
- @@last = Time.now
105
- @@entrez_lag = 1
106
- def self.get_online(geneids)
107
-
108
- geneids_list = ( geneids.is_a?(Array) ? geneids.join(',') : geneids.to_s )
109
- url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=xml&id=#{geneids_list}"
110
-
111
- diff = Time.now - @@last
112
- sleep @@entrez_lag - diff unless diff > @@entrez_lag
113
-
114
- xml = Open.read(url, :quiet => true, :nocache => true)
115
-
116
- @@last = Time.now
117
-
118
- genes = xml.scan(/(<Entrezgene>.*?<\/Entrezgene>)/sm).flatten
119
-
120
- if geneids.is_a? Array
121
- list = {}
122
- genes.each_with_index{|gene,i|
123
- #geneid = gene.scan(/<Gene-track_geneid>(.*?)<\/Gene-track_geneid>/).flatten.first
124
- geneid = geneids[i]
125
- list[geneid ] = gene
126
- }
127
- return list
128
- else
129
- return genes.first
130
- end
131
-
132
- end
133
-
134
- public
135
-
136
- # Build a file name for a gene based on the id. Prefix the id by 'gene-',
137
- # substitute the slashes with '_SLASH_', and add a '.xml' extension.
138
- def self.gene_filename(id)
139
- FileCache.clean_path('gene-' + id.to_s + '.xml')
140
- end
141
-
142
- # Returns a Gene object for the given Entrez Gene id. If an array of
143
- # ids is given instead, a hash is returned. This method uses the
144
- # caching facilities from Rbbt.
145
- def self.get_gene(geneid)
146
-
147
- return nil if geneid.nil?
148
-
149
- if Array === geneid
150
- missing = []
151
- list = {}
152
-
153
- geneid.each{|p|
154
- next if p.nil?
155
- filename = gene_filename p
156
- if File.exists? FileCache.path(filename)
157
- list[p] = Gene.new(Open.read(FileCache.path(filename)))
158
- else
159
- missing << p
160
- end
161
- }
162
-
163
- return list unless missing.any?
164
- genes = get_online(missing)
165
-
166
- genes.each{|p, xml|
167
- filename = gene_filename p
168
- FileCache.add_file(filename,xml) unless File.exist? FileCache.path(filename)
169
- list[p] = Gene.new(xml)
170
- }
171
-
172
- return list
173
-
174
- else
175
- filename = gene_filename geneid
176
-
177
- if File.exists? FileCache.path(filename)
178
- return Gene.new(Open.read(FileCache.path(filename)))
179
- else
180
- xml = get_online(geneid)
181
- FileCache.add_file(filename,xml)
182
-
183
- return Gene.new(xml)
184
- end
185
- end
186
- end
187
-
188
- # Counts the words in common between a chunk of text and the text
189
- # found in Entrez Gene for that particular gene. The +gene+ may be a
190
- # gene identifier or a Gene class instance.
191
- def self.gene_text_similarity(gene, text)
192
-
193
- case
194
- when Entrez::Gene === gene
195
- gene_text = gene.text
196
- when String === gene || Fixnum === gene
197
- gene_text = get_gene(gene).text
198
- else
199
- return 0
200
- end
201
-
202
-
203
- gene_words = gene_text.words.to_set
204
- text_words = text.words.to_set
205
-
206
- return 0 if gene_words.empty? || text_words.empty?
207
-
208
- common = gene_words.intersection(text_words)
209
- common.length / (gene_words.length + text_words.length).to_f
210
- end
211
- end
@@ -1,40 +0,0 @@
1
- require 'rbbt'
2
-
3
-
4
- # This module holds helper methods to deal with the Gene Ontology files. Right
5
- # now all it does is provide a translation form id to the actual names.
6
- module GO
7
- @@info = nil
8
-
9
- # This method needs to be called before any translations can be made, it is
10
- # called automatically the first time the id2name method is called. It loads
11
- # the gene_ontology.obo file and extracts all the fields, although right now,
12
- # only the name field is used.
13
- def self.init
14
- @@info = {}
15
- File.open(File.join(Rbbt.datadir, 'dbs/go/gene_ontology.obo')).read.
16
- split(/\[Term\]/).
17
- each{|term|
18
- term_info = {}
19
- term.split(/\n/).
20
- select{|l| l =~ /:/}.
21
- each{|l|
22
- key, value = l.chomp.match(/(.*?):(.*)/).values_at(1,2)
23
- term_info[key.strip] = value.strip
24
- }
25
- @@info[term_info["id"]] = term_info
26
- }
27
- end
28
-
29
- def self.id2name(id)
30
- self.init unless @@info
31
- if id.kind_of? Array
32
- @@info.values_at(*id).collect{|i| i['name'] if i}
33
- else
34
- return "Name not found" unless @@info[id]
35
- @@info[id]['name']
36
- end
37
- end
38
-
39
-
40
- end
@@ -1,245 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/open'
3
- require 'rbbt/util/index'
4
-
5
- # This module contains some Organism centric functionalities. Each organism is
6
- # identified by a keyword.
7
- module Organism
8
-
9
- # Raised when trying to access information for an organism that has not been
10
- # prepared already.
11
- class OrganismNotProcessedError < StandardError; end
12
-
13
- # Return the list of all supported organisms. The prepared flag is used to
14
- # show only those that have been prepared.
15
- def self.all(prepared = true)
16
- if prepared
17
- Dir.glob(File.join(Rbbt.datadir,'/organisms/') + '/*/identifiers').collect{|f| File.basename(File.dirname(f))}
18
- else
19
- Dir.glob(File.join(Rbbt.datadir,'/organisms/') + '/*').select{|f| File.directory? f}.collect{|f| File.basename(f)}
20
- end
21
- end
22
-
23
-
24
- # Return the complete name of an organism. The org parameter is the organism
25
- # keyword
26
- def self.name(org)
27
- raise OrganismNotProcessedError, "Missing 'name' file" if ! File.exists? File.join(Rbbt.datadir,"organisms/#{ org }/name")
28
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/name"))
29
- end
30
-
31
- # Hash linking all the organism log names with their keywords in Rbbt. Its
32
- # the inverse of the name method.
33
- NAME2ORG = {}
34
- Organism::all.each{|org|
35
- name = Organism.name(org).strip.downcase
36
- NAME2ORG[name] = org
37
- }
38
-
39
-
40
- # Return the key word associated with an organism.
41
- def self.name2org(name)
42
- NAME2ORG[name.strip.downcase]
43
- end
44
-
45
- # FIXME: The NER related stuff is harder to install, thats why we hide the
46
- # requires next to where they are needed, next to options
47
-
48
- # Return a NER object which could be of RNER, Abner or Banner class, this is
49
- # selected using the type parameter.
50
- def self.ner(org, type=:rner, options = {})
51
-
52
- case type.to_sym
53
- when :abner
54
- require 'rbbt/ner/abner'
55
- return Abner.new
56
- when :banner
57
- require 'rbbt/ner/banner'
58
- return Banner.new
59
- when :rner
60
- require 'rbbt/ner/rner'
61
- model = options[:model]
62
- model ||= File.join(Rbbt.datadir,"ner/model/#{ org }") if File.exist? File.join(Rbbt.datadir,"ner/model/#{ org }")
63
- model ||= File.join(Rbbt.datadir,'ner/model/BC2')
64
- return NER.new(model)
65
- else
66
- raise "Ner type (#{ type }) unknown"
67
- end
68
-
69
- end
70
-
71
- # Return a normalization object.
72
- def self.norm(org, to_entrez = nil)
73
- require 'rbbt/ner/rnorm'
74
- if to_entrez.nil?
75
- to_entrez = id_index(org, :native => 'Entrez Gene ID', :other => [supported_ids(org).first])
76
- end
77
-
78
- token_file = File.join(Rbbt.datadir, 'norm','config',org.to_s + '.config')
79
- if !File.exists? token_file
80
- token_file = nil
81
- end
82
-
83
- Normalizer.new(File.join(Rbbt.datadir,"organisms/#{ org }/lexicon"), :to_entrez => to_entrez, :file => token_file, :max_candidates => 20)
84
- end
85
-
86
- # Returns a hash with the names associated with each gene id. The ids are
87
- # in Rbbt native format for that organism.
88
- def self.lexicon(org, options = {})
89
- options = {:sep => "\t|\\|", :flatten => true}.merge(options)
90
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/lexicon"),options)
91
- end
92
-
93
- # Returns a hash with the list of go terms for each gene id. Gene ids are in
94
- # Rbbt native format for that organism.
95
- def self.goterms(org)
96
- goterms = {}
97
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/gene.go")).each_line{|l|
98
- gene, go = l.chomp.split(/\t/)
99
- goterms[gene.strip] ||= []
100
- goterms[gene.strip] << go.strip
101
- }
102
- goterms
103
- end
104
-
105
- # Return list of PubMed ids associated to the organism. Determined using a
106
- # PubMed query with the name of the organism
107
- def self.literature(org)
108
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/all.pmid")).scan(/\d+/)
109
- end
110
-
111
- # Return hash that associates genes to a list of PubMed ids.
112
- def self.gene_literature(org)
113
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/gene.pmid"), :flatten => true)
114
- end
115
-
116
- # Return hash that associates genes to a list of PubMed ids. Includes only
117
- # those found to support GO term associations.
118
- def self.gene_literature_go(org)
119
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/gene_go.pmid"), :flatten => true)
120
- end
121
-
122
- # Returns a list with the names of the id formats supported for an organism.
123
- # If examples are produced, the list is of [format, example] pairs.
124
- #
125
- # *Options:*
126
- #
127
- # *examples:* Include example ids for each format
128
- def self.supported_ids(org, options = {})
129
- formats = []
130
- examples = [] if options[:examples]
131
- i= 0
132
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers")).each_line{|l|
133
- if i == 0
134
- i += 1
135
- next unless l=~/^\s*#/
136
- formats = Open.fields(l.sub(/^[\s#]+/,'')).collect{|n| n.strip}
137
- return formats unless examples
138
- next
139
- end
140
-
141
- if Open.fields(l).select{|name| name && name =~ /\w/}.length > examples.length
142
- examples = Open.fields(l).collect{|name| name.split(/\|/).first}
143
- end
144
- i += 1
145
- }
146
-
147
- formats.zip(examples)
148
- end
149
-
150
- # Creates a hash where each possible id is associated with the names of the
151
- # formats (its potentially possible for different formats to have the same
152
- # id). This is used in the guessIdFormat method.
153
- def self.id_formats(org)
154
- id_types = {}
155
- formats = supported_ids(org)
156
-
157
- text = Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"))
158
-
159
- if text.respond_to? :collect
160
- lines = text.collect
161
- else
162
- lines = text.lines
163
- end
164
-
165
- lines.each{|l|
166
- ids_per_type = Open.fields(l)
167
- formats.zip(ids_per_type).each{|p|
168
- format = p[0]
169
- p[1] ||= ""
170
- ids = p[1].split(/\|/)
171
- ids.each{|id|
172
- next if id.nil? || id == ""
173
- id_types[id.downcase] ||= []
174
- id_types[id.downcase] << format unless id_types[id.downcase].include? format
175
- }
176
- }
177
- }
178
-
179
- return id_types
180
- end
181
-
182
- def self.guessIdFormat(formats, query)
183
- query = query.compact.collect{|gene| gene.downcase}.uniq
184
- if String === formats
185
- formats = id_formats(formats)
186
- end
187
-
188
- return nil if formats.values.empty?
189
- values = formats.values_at(*query)
190
- return nil if values.empty?
191
-
192
- format_count = {}
193
- values.compact.collect{|types| types.uniq}.flatten.each{|f|
194
- format_count[f] ||= 0
195
- format_count[f] += 1
196
- }
197
-
198
- return nil if format_count.values.empty?
199
- format_count.select{|k,v| v > (query.length / 10)}.sort{|a,b| b[1] <=> a[1]}.first
200
- end
201
-
202
- def self.id_position(supported_ids, id_name, options = {})
203
- pos = 0
204
- supported_ids.each_with_index{|id, i|
205
- if id.strip == id_name.strip || !options[:case_sensitive] && id.strip.downcase == id_name.strip.downcase
206
- pos = i;
207
- end
208
- }
209
- pos
210
- end
211
-
212
- def self.id_index(org, option = {})
213
- native = option[:native]
214
- other = option[:other]
215
- option[:case_sensitive] = false if option[:case_sensitive].nil?
216
-
217
- if native.nil? and other.nil?
218
- Index.index(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"), option)
219
- else
220
- supported = Organism.supported_ids(org)
221
-
222
- first = nil
223
- if native
224
- first = id_position(supported,native,option)
225
- else
226
- first = 0
227
- end
228
-
229
- rest = nil
230
- if other
231
- rest = other.collect{|name| id_position(supported,name, option)}
232
- else
233
- rest = (0..supported.length - 1).to_a - [first]
234
- end
235
-
236
- option[:native] = first
237
- option[:extra] = rest
238
- index = Index.index(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"), option)
239
-
240
- index
241
- end
242
- end
243
-
244
- end
245
-