rbbt 1.1.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +72 -136
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -246
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -145
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -79
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Rakefile +0 -43
  22. data/install_scripts/organisms/cgd.Rakefile +0 -84
  23. data/install_scripts/organisms/human.Rakefile +0 -145
  24. data/install_scripts/organisms/mgi.Rakefile +0 -77
  25. data/install_scripts/organisms/pombe.Rakefile +0 -40
  26. data/install_scripts/organisms/rake-include.rb +0 -258
  27. data/install_scripts/organisms/rgd.Rakefile +0 -88
  28. data/install_scripts/organisms/sgd.Rakefile +0 -66
  29. data/install_scripts/organisms/tair.Rakefile +0 -54
  30. data/install_scripts/organisms/worm.Rakefile +0 -109
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -86
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -40
  49. data/lib/rbbt/sources/organism.rb +0 -245
  50. data/lib/rbbt/sources/polysearch.rb +0 -117
  51. data/lib/rbbt/sources/pubmed.rb +0 -111
  52. data/lib/rbbt/util/arrayHash.rb +0 -255
  53. data/lib/rbbt/util/filecache.rb +0 -72
  54. data/lib/rbbt/util/index.rb +0 -47
  55. data/lib/rbbt/util/misc.rb +0 -106
  56. data/lib/rbbt/util/open.rb +0 -235
  57. data/lib/rbbt/util/rake.rb +0 -183
  58. data/lib/rbbt/util/simpleDSL.rb +0 -87
  59. data/lib/rbbt/util/tmpfile.rb +0 -19
  60. data/tasks/install.rake +0 -124
@@ -1,105 +0,0 @@
1
- require 'rbbt/util/open'
2
- require 'rbbt'
3
-
4
- # This module interacts with BioMart. It performs queries to BioMart and
5
- # synthesises a hash with the results. Note that this module connects to the
6
- # online BioMart WS using the Open in 'rbbt/util/open' module which offers
7
- # caching by default. To obtain up to date results you may need to clear the
8
- # cache from previous queries.
9
- module BioMart
10
-
11
- class BioMart::QueryError < StandardError; end
12
- private
13
-
14
- @@biomart_query_xml = <<-EOT
15
- <?xml version="1.0" encoding="UTF-8"?>
16
- <!DOCTYPE Query>
17
- <Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "1" count = "" datasetConfigVersion = "0.6" >
18
- <Dataset name = "<!--DATABASE-->" interface = "default" >
19
- <!--FILTERS-->
20
- <!--MAIN-->
21
- <!--ATTRIBUTES-->
22
- </Dataset>
23
- </Query>
24
- EOT
25
-
26
-
27
-
28
-
29
- def self.get(database, main, attrs = nil, filters = nil, data = nil)
30
- attrs ||= []
31
- filters ||= ["with_#{main}"]
32
- data ||= {}
33
-
34
- query = @@biomart_query_xml.clone
35
- query.sub!(/<!--DATABASE-->/,database)
36
- query.sub!(/<!--FILTERS-->/, filters.collect{|name| "<Filter name = \"#{ name }\" excluded = \"0\"/>"}.join("\n") )
37
- query.sub!(/<!--MAIN-->/,"<Attribute name = \"#{main}\" />")
38
- query.sub!(/<!--ATTRIBUTES-->/, attrs.collect{|name| "<Attribute name = \"#{ name }\"/>"}.join("\n") )
39
-
40
- response = Open.read('http://www.biomart.org/biomart/martservice?query=' + query.gsub(/\n/,' '))
41
- if response =~ /Query ERROR:/
42
- raise BioMart::QueryError, response
43
- end
44
-
45
- response.each_line{|l|
46
- parts = l.chomp.split(/\t/)
47
- main = parts.shift
48
- next if main.nil? || main.empty?
49
-
50
- data[main] ||= {}
51
- attrs.each{|name|
52
- value = parts.shift
53
- data[main][name] ||= []
54
- next if value.nil?
55
- data[main][name] << value
56
- }
57
- }
58
-
59
- data
60
-
61
- end
62
-
63
- public
64
-
65
- # This method performs a query in biomart for a datasets and a given set of
66
- # attributes, there must be a main attribute that will be used as the key in
67
- # the result hash, optionally there may be a list of additional attributes
68
- # and filters. The data parameter at the end is used internally to
69
- # incrementally building the result, due to a limitation of the BioMart WS
70
- # that only allows 3 external arguments, users normally should leave it
71
- # unspecified or nil. The result is a hash, where the keys are the different
72
- # values for the main attribute, and the value is a hash with every other
73
- # attribute as key, and as value and array with all possible values (Note
74
- # that for a given value of the main attribute, there may be more than one
75
- # value for another attribute). If filters is left a nil it adds a filter to
76
- # the BioMart query to remove results with the main attribute empty, this may
77
- # cause an error if the BioMart WS does not allow filtering with that
78
- # attribute.
79
- def self.query(database, main, attrs = nil, filters = nil, data = nil)
80
- attrs ||= []
81
- data ||= {}
82
-
83
- chunks = []
84
- chunk = []
85
- attrs.each{|a|
86
- chunk << a
87
- if chunk.length == 2
88
- chunks << chunk
89
- chunk = []
90
- end
91
- }
92
-
93
- chunks << chunk if chunk.any?
94
-
95
- chunks.each{|chunk|
96
- data = get(database,main,chunk, filters, data)
97
- }
98
-
99
- data
100
- end
101
-
102
-
103
-
104
- end
105
-
@@ -1,211 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/open'
3
- require 'rbbt/util/tmpfile'
4
- require 'rbbt/util/filecache'
5
- require 'rbbt/bow/bow.rb'
6
- require 'set'
7
-
8
-
9
- # This module is used to parse and extract information from the
10
- # gene_info file at Entrez Gene, as well as from the gene2pubmed file.
11
- # Both need to be downloaded and accesible for Rbbt, which is done as
12
- # part of a normal installation.
13
- module Entrez
14
-
15
- class NoFileError < StandardError; end
16
-
17
- # Given a taxonomy, or set of taxonomies, it returns an inverse hash,
18
- # where each key is the entrez id of a gene, and the value is an array
19
- # of possible synonyms in other databases. Is mostly used to translate
20
- # entrez ids to the native database id of the organism. The parameter
21
- # +native+ specifies the position of the key containing synonym, the
22
- # fifth by default, +fix+ and +check+ are Procs used, if present, to
23
- # pre-process lines and to check if they should be processed.
24
- def self.entrez2native(taxs, native = nil, fix = nil, check = nil)
25
-
26
- raise NoFileError, "Install the Entrez gene_info file" unless File.exists? File.join(Rbbt.datadir, 'dbs/entrez/gene_info')
27
-
28
- native ||= 5
29
-
30
- taxs = [taxs] unless taxs.is_a?(Array)
31
- taxs = taxs.collect{|t| t.to_s}
32
-
33
- lexicon = {}
34
- tmp = TmpFile.tmp_file("entrez-")
35
- system "cat '#{File.join(Rbbt.datadir, 'dbs/entrez/gene_info')}' |grep '^\\(#{taxs.join('\\|')}\\)[[:space:]]' > #{tmp}"
36
- File.open(tmp).each{|l|
37
- parts = l.chomp.split(/\t/)
38
- next if parts[native] == '-'
39
- entrez = parts[1]
40
- parts[native].split(/\|/).each{|id|
41
- id = fix.call(id) if fix
42
- next if check && !check.call(id)
43
-
44
- lexicon[entrez] ||= []
45
- lexicon[entrez] << id
46
- }
47
- }
48
- FileUtils.rm tmp
49
-
50
- lexicon
51
- end
52
-
53
- # For a given taxonomy, or set of taxonomies, it returns a hash with
54
- # genes as keys and arrays of related PubMed ids as values, as
55
- # extracted from the gene2pubmed file from Entrez Gene.
56
- def self.entrez2pubmed(taxs)
57
- raise NoFileError, "Install the Entrez gene2pubmed file" unless File.exists? File.join(Rbbt.datadir, 'dbs/entrez/gene2pubmed')
58
-
59
- taxs = [taxs] unless taxs.is_a?(Array)
60
- taxs = taxs.collect{|t| t.to_s}
61
-
62
- data = {}
63
- tmp = TmpFile.tmp_file("entrez-")
64
- system "cat '#{File.join(Rbbt.datadir, 'dbs/entrez/gene2pubmed')}' |grep '^\\(#{taxs.join('\\|')}\\)[[:space:]]' > #{tmp}"
65
-
66
- data = Open.to_hash(tmp, :native => 1, :extra => 2).each{|code, value_lists| value_lists.flatten!}
67
-
68
- FileUtils.rm tmp
69
-
70
- data
71
- end
72
-
73
-
74
-
75
- # This class parses an xml containing the information for a particular
76
- # gene as served by Entrez Gene, and hold some of its information.
77
- class Gene
78
- attr_reader :organism, :symbol, :description, :aka, :protnames, :summary, :comentaries
79
-
80
- def initialize(xml)
81
- return if xml.nil?
82
-
83
- @organism = xml.scan(/<Org-ref_taxname>(.*)<\/Org-ref_taxname>/s)
84
- @symbol = xml.scan(/<Gene-ref_locus>(.*)<\/Gene-ref_locus>/s)
85
- @description = xml.scan(/<Gene-ref_desc>(.*)<\/Gene-ref_desc>/s)
86
- @aka = xml.scan(/<Gene-ref_syn_E>(.*)<\Gene-ref_syn_E>/s)
87
- @protnames = xml.scan(/<Prot-ref_name_E>(.*)<\/Prot-ref_name_E>/s)
88
- @summary = xml.scan(/<Entrezgene_summary>(.*)<\/Entrezgene_summary>/s)
89
- @comentaries = xml.scan(/<Gene-commentary_text>(.*)<\/Gene-commentary_text>/s)
90
-
91
-
92
- end
93
-
94
- # Joins the text from symbol, description, aka, protnames, and
95
- # summary
96
- def text
97
- #[@organism, @symbol, @description, @aka, @protnames, @summary,@comentaries.join(". ")].join(". ")
98
- [@symbol, @description, @aka, @protnames, @summary].flatten.join(". ")
99
- end
100
- end
101
-
102
- private
103
-
104
- @@last = Time.now
105
- @@entrez_lag = 1
106
- def self.get_online(geneids)
107
-
108
- geneids_list = ( geneids.is_a?(Array) ? geneids.join(',') : geneids.to_s )
109
- url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=gene&retmode=xml&id=#{geneids_list}"
110
-
111
- diff = Time.now - @@last
112
- sleep @@entrez_lag - diff unless diff > @@entrez_lag
113
-
114
- xml = Open.read(url, :quiet => true, :nocache => true)
115
-
116
- @@last = Time.now
117
-
118
- genes = xml.scan(/(<Entrezgene>.*?<\/Entrezgene>)/sm).flatten
119
-
120
- if geneids.is_a? Array
121
- list = {}
122
- genes.each_with_index{|gene,i|
123
- #geneid = gene.scan(/<Gene-track_geneid>(.*?)<\/Gene-track_geneid>/).flatten.first
124
- geneid = geneids[i]
125
- list[geneid ] = gene
126
- }
127
- return list
128
- else
129
- return genes.first
130
- end
131
-
132
- end
133
-
134
- public
135
-
136
- # Build a file name for a gene based on the id. Prefix the id by 'gene-',
137
- # substitute the slashes with '_SLASH_', and add a '.xml' extension.
138
- def self.gene_filename(id)
139
- FileCache.clean_path('gene-' + id.to_s + '.xml')
140
- end
141
-
142
- # Returns a Gene object for the given Entrez Gene id. If an array of
143
- # ids is given instead, a hash is returned. This method uses the
144
- # caching facilities from Rbbt.
145
- def self.get_gene(geneid)
146
-
147
- return nil if geneid.nil?
148
-
149
- if Array === geneid
150
- missing = []
151
- list = {}
152
-
153
- geneid.each{|p|
154
- next if p.nil?
155
- filename = gene_filename p
156
- if File.exists? FileCache.path(filename)
157
- list[p] = Gene.new(Open.read(FileCache.path(filename)))
158
- else
159
- missing << p
160
- end
161
- }
162
-
163
- return list unless missing.any?
164
- genes = get_online(missing)
165
-
166
- genes.each{|p, xml|
167
- filename = gene_filename p
168
- FileCache.add_file(filename,xml) unless File.exist? FileCache.path(filename)
169
- list[p] = Gene.new(xml)
170
- }
171
-
172
- return list
173
-
174
- else
175
- filename = gene_filename geneid
176
-
177
- if File.exists? FileCache.path(filename)
178
- return Gene.new(Open.read(FileCache.path(filename)))
179
- else
180
- xml = get_online(geneid)
181
- FileCache.add_file(filename,xml)
182
-
183
- return Gene.new(xml)
184
- end
185
- end
186
- end
187
-
188
- # Counts the words in common between a chunk of text and the text
189
- # found in Entrez Gene for that particular gene. The +gene+ may be a
190
- # gene identifier or a Gene class instance.
191
- def self.gene_text_similarity(gene, text)
192
-
193
- case
194
- when Entrez::Gene === gene
195
- gene_text = gene.text
196
- when String === gene || Fixnum === gene
197
- gene_text = get_gene(gene).text
198
- else
199
- return 0
200
- end
201
-
202
-
203
- gene_words = gene_text.words.to_set
204
- text_words = text.words.to_set
205
-
206
- return 0 if gene_words.empty? || text_words.empty?
207
-
208
- common = gene_words.intersection(text_words)
209
- common.length / (gene_words.length + text_words.length).to_f
210
- end
211
- end
@@ -1,40 +0,0 @@
1
- require 'rbbt'
2
-
3
-
4
- # This module holds helper methods to deal with the Gene Ontology files. Right
5
- # now all it does is provide a translation form id to the actual names.
6
- module GO
7
- @@info = nil
8
-
9
- # This method needs to be called before any translations can be made, it is
10
- # called automatically the first time the id2name method is called. It loads
11
- # the gene_ontology.obo file and extracts all the fields, although right now,
12
- # only the name field is used.
13
- def self.init
14
- @@info = {}
15
- File.open(File.join(Rbbt.datadir, 'dbs/go/gene_ontology.obo')).read.
16
- split(/\[Term\]/).
17
- each{|term|
18
- term_info = {}
19
- term.split(/\n/).
20
- select{|l| l =~ /:/}.
21
- each{|l|
22
- key, value = l.chomp.match(/(.*?):(.*)/).values_at(1,2)
23
- term_info[key.strip] = value.strip
24
- }
25
- @@info[term_info["id"]] = term_info
26
- }
27
- end
28
-
29
- def self.id2name(id)
30
- self.init unless @@info
31
- if id.kind_of? Array
32
- @@info.values_at(*id).collect{|i| i['name'] if i}
33
- else
34
- return "Name not found" unless @@info[id]
35
- @@info[id]['name']
36
- end
37
- end
38
-
39
-
40
- end
@@ -1,245 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/open'
3
- require 'rbbt/util/index'
4
-
5
- # This module contains some Organism centric functionalities. Each organism is
6
- # identified by a keyword.
7
- module Organism
8
-
9
- # Raised when trying to access information for an organism that has not been
10
- # prepared already.
11
- class OrganismNotProcessedError < StandardError; end
12
-
13
- # Return the list of all supported organisms. The prepared flag is used to
14
- # show only those that have been prepared.
15
- def self.all(prepared = true)
16
- if prepared
17
- Dir.glob(File.join(Rbbt.datadir,'/organisms/') + '/*/identifiers').collect{|f| File.basename(File.dirname(f))}
18
- else
19
- Dir.glob(File.join(Rbbt.datadir,'/organisms/') + '/*').select{|f| File.directory? f}.collect{|f| File.basename(f)}
20
- end
21
- end
22
-
23
-
24
- # Return the complete name of an organism. The org parameter is the organism
25
- # keyword
26
- def self.name(org)
27
- raise OrganismNotProcessedError, "Missing 'name' file" if ! File.exists? File.join(Rbbt.datadir,"organisms/#{ org }/name")
28
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/name"))
29
- end
30
-
31
- # Hash linking all the organism log names with their keywords in Rbbt. Its
32
- # the inverse of the name method.
33
- NAME2ORG = {}
34
- Organism::all.each{|org|
35
- name = Organism.name(org).strip.downcase
36
- NAME2ORG[name] = org
37
- }
38
-
39
-
40
- # Return the key word associated with an organism.
41
- def self.name2org(name)
42
- NAME2ORG[name.strip.downcase]
43
- end
44
-
45
- # FIXME: The NER related stuff is harder to install, thats why we hide the
46
- # requires next to where they are needed, next to options
47
-
48
- # Return a NER object which could be of RNER, Abner or Banner class, this is
49
- # selected using the type parameter.
50
- def self.ner(org, type=:rner, options = {})
51
-
52
- case type.to_sym
53
- when :abner
54
- require 'rbbt/ner/abner'
55
- return Abner.new
56
- when :banner
57
- require 'rbbt/ner/banner'
58
- return Banner.new
59
- when :rner
60
- require 'rbbt/ner/rner'
61
- model = options[:model]
62
- model ||= File.join(Rbbt.datadir,"ner/model/#{ org }") if File.exist? File.join(Rbbt.datadir,"ner/model/#{ org }")
63
- model ||= File.join(Rbbt.datadir,'ner/model/BC2')
64
- return NER.new(model)
65
- else
66
- raise "Ner type (#{ type }) unknown"
67
- end
68
-
69
- end
70
-
71
- # Return a normalization object.
72
- def self.norm(org, to_entrez = nil)
73
- require 'rbbt/ner/rnorm'
74
- if to_entrez.nil?
75
- to_entrez = id_index(org, :native => 'Entrez Gene ID', :other => [supported_ids(org).first])
76
- end
77
-
78
- token_file = File.join(Rbbt.datadir, 'norm','config',org.to_s + '.config')
79
- if !File.exists? token_file
80
- token_file = nil
81
- end
82
-
83
- Normalizer.new(File.join(Rbbt.datadir,"organisms/#{ org }/lexicon"), :to_entrez => to_entrez, :file => token_file, :max_candidates => 20)
84
- end
85
-
86
- # Returns a hash with the names associated with each gene id. The ids are
87
- # in Rbbt native format for that organism.
88
- def self.lexicon(org, options = {})
89
- options = {:sep => "\t|\\|", :flatten => true}.merge(options)
90
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/lexicon"),options)
91
- end
92
-
93
- # Returns a hash with the list of go terms for each gene id. Gene ids are in
94
- # Rbbt native format for that organism.
95
- def self.goterms(org)
96
- goterms = {}
97
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/gene.go")).each_line{|l|
98
- gene, go = l.chomp.split(/\t/)
99
- goterms[gene.strip] ||= []
100
- goterms[gene.strip] << go.strip
101
- }
102
- goterms
103
- end
104
-
105
- # Return list of PubMed ids associated to the organism. Determined using a
106
- # PubMed query with the name of the organism
107
- def self.literature(org)
108
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/all.pmid")).scan(/\d+/)
109
- end
110
-
111
- # Return hash that associates genes to a list of PubMed ids.
112
- def self.gene_literature(org)
113
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/gene.pmid"), :flatten => true)
114
- end
115
-
116
- # Return hash that associates genes to a list of PubMed ids. Includes only
117
- # those found to support GO term associations.
118
- def self.gene_literature_go(org)
119
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/gene_go.pmid"), :flatten => true)
120
- end
121
-
122
- # Returns a list with the names of the id formats supported for an organism.
123
- # If examples are produced, the list is of [format, example] pairs.
124
- #
125
- # *Options:*
126
- #
127
- # *examples:* Include example ids for each format
128
- def self.supported_ids(org, options = {})
129
- formats = []
130
- examples = [] if options[:examples]
131
- i= 0
132
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers")).each_line{|l|
133
- if i == 0
134
- i += 1
135
- next unless l=~/^\s*#/
136
- formats = Open.fields(l.sub(/^[\s#]+/,'')).collect{|n| n.strip}
137
- return formats unless examples
138
- next
139
- end
140
-
141
- if Open.fields(l).select{|name| name && name =~ /\w/}.length > examples.length
142
- examples = Open.fields(l).collect{|name| name.split(/\|/).first}
143
- end
144
- i += 1
145
- }
146
-
147
- formats.zip(examples)
148
- end
149
-
150
- # Creates a hash where each possible id is associated with the names of the
151
- # formats (its potentially possible for different formats to have the same
152
- # id). This is used in the guessIdFormat method.
153
- def self.id_formats(org)
154
- id_types = {}
155
- formats = supported_ids(org)
156
-
157
- text = Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"))
158
-
159
- if text.respond_to? :collect
160
- lines = text.collect
161
- else
162
- lines = text.lines
163
- end
164
-
165
- lines.each{|l|
166
- ids_per_type = Open.fields(l)
167
- formats.zip(ids_per_type).each{|p|
168
- format = p[0]
169
- p[1] ||= ""
170
- ids = p[1].split(/\|/)
171
- ids.each{|id|
172
- next if id.nil? || id == ""
173
- id_types[id.downcase] ||= []
174
- id_types[id.downcase] << format unless id_types[id.downcase].include? format
175
- }
176
- }
177
- }
178
-
179
- return id_types
180
- end
181
-
182
- def self.guessIdFormat(formats, query)
183
- query = query.compact.collect{|gene| gene.downcase}.uniq
184
- if String === formats
185
- formats = id_formats(formats)
186
- end
187
-
188
- return nil if formats.values.empty?
189
- values = formats.values_at(*query)
190
- return nil if values.empty?
191
-
192
- format_count = {}
193
- values.compact.collect{|types| types.uniq}.flatten.each{|f|
194
- format_count[f] ||= 0
195
- format_count[f] += 1
196
- }
197
-
198
- return nil if format_count.values.empty?
199
- format_count.select{|k,v| v > (query.length / 10)}.sort{|a,b| b[1] <=> a[1]}.first
200
- end
201
-
202
- def self.id_position(supported_ids, id_name, options = {})
203
- pos = 0
204
- supported_ids.each_with_index{|id, i|
205
- if id.strip == id_name.strip || !options[:case_sensitive] && id.strip.downcase == id_name.strip.downcase
206
- pos = i;
207
- end
208
- }
209
- pos
210
- end
211
-
212
- def self.id_index(org, option = {})
213
- native = option[:native]
214
- other = option[:other]
215
- option[:case_sensitive] = false if option[:case_sensitive].nil?
216
-
217
- if native.nil? and other.nil?
218
- Index.index(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"), option)
219
- else
220
- supported = Organism.supported_ids(org)
221
-
222
- first = nil
223
- if native
224
- first = id_position(supported,native,option)
225
- else
226
- first = 0
227
- end
228
-
229
- rest = nil
230
- if other
231
- rest = other.collect{|name| id_position(supported,name, option)}
232
- else
233
- rest = (0..supported.length - 1).to_a - [first]
234
- end
235
-
236
- option[:native] = first
237
- option[:extra] = rest
238
- index = Index.index(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"), option)
239
-
240
- index
241
- end
242
- end
243
-
244
- end
245
-