bio-exominer 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +14 -0
- data/Gemfile +17 -0
- data/LICENSE.txt +20 -0
- data/README.md +413 -0
- data/Rakefile +58 -0
- data/VERSION +1 -0
- data/bin/exominer +250 -0
- data/bin/hugo_exominer_symbols +74 -0
- data/bin/ncbi_exominer_symbols +79 -0
- data/bin/pack_exominer_symbols +38 -0
- data/features/bio-exominer.feature +9 -0
- data/features/step_definitions/bio-exominer_steps.rb +0 -0
- data/features/support/env.rb +13 -0
- data/lib/bio-exominer.rb +14 -0
- data/lib/bio-exominer/exominer.rb +3 -0
- data/lib/bio-exominer/rdf.rb +38 -0
- data/lib/bio-exominer/symbols.rb +49 -0
- data/lib/bio-exominer/textparser.rb +124 -0
- data/scripts/4store.sh +30 -0
- data/scripts/example.sh +9 -0
- data/scripts/example_rdf.sh +7 -0
- data/scripts/load_rdf.sh +15 -0
- data/spec/bio-exominer_spec.rb +8 -0
- data/spec/rdf_spec.rb +28 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/text_parser_spec.rb +59 -0
- data/test/data/input/hugo_symbols +38106 -0
- metadata +195 -0
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-exominer"
|
18
|
+
gem.homepage = "http://github.com/pjotrp/bioruby-exominer"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Mine publications for gene names}
|
21
|
+
gem.description = %Q{Parse publications for gene names in a fuzzy fashion}
|
22
|
+
gem.email = "pjotr.public01@thebird.nl"
|
23
|
+
gem.authors = ["Pjotr Prins"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
# require 'rspec/core'
|
29
|
+
# require 'rspec/core/rake_task'
|
30
|
+
# RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
# spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
# end
|
33
|
+
|
34
|
+
# RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
# spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
# spec.rcov = true
|
37
|
+
# end
|
38
|
+
|
39
|
+
require 'rake/testtask'
|
40
|
+
|
41
|
+
Rake::TestTask.new do |t|
|
42
|
+
t.pattern = "spec/*_spec.rb"
|
43
|
+
end
|
44
|
+
|
45
|
+
require 'cucumber/rake/task'
|
46
|
+
Cucumber::Rake::Task.new(:features)
|
47
|
+
|
48
|
+
task :default => :test
|
49
|
+
|
50
|
+
require 'rdoc/task'
|
51
|
+
Rake::RDocTask.new do |rdoc|
|
52
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
53
|
+
|
54
|
+
rdoc.rdoc_dir = 'rdoc'
|
55
|
+
rdoc.title = "bio-exominer #{version}"
|
56
|
+
rdoc.rdoc_files.include('README*')
|
57
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
58
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.3
|
data/bin/exominer
ADDED
@@ -0,0 +1,250 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# BioRuby exominer Plugin BioExominer
|
4
|
+
# Author:: Pjotr Prins
|
5
|
+
#
|
6
|
+
# Copyright (C) 2013,2014 Cuppen Group & Pjotr Prins <pjotr.prins@thebird.nl>
|
7
|
+
|
8
|
+
USAGE = "exominer takes a symbol file and parses the piped data for gene symbols"
|
9
|
+
|
10
|
+
gempath = File.dirname(File.dirname(__FILE__))
|
11
|
+
$: << File.join(gempath,'lib')
|
12
|
+
|
13
|
+
VERSION_FILENAME=File.join(gempath,'VERSION')
|
14
|
+
version = File.new(VERSION_FILENAME).read.chomp
|
15
|
+
|
16
|
+
# print banner
|
17
|
+
$stderr.print "exominer #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2014\n"
|
18
|
+
|
19
|
+
if ARGV.size == 0
|
20
|
+
print USAGE
|
21
|
+
end
|
22
|
+
|
23
|
+
require 'bio-exominer'
|
24
|
+
require 'optparse'
|
25
|
+
|
26
|
+
# Uncomment when using the bio-logger
|
27
|
+
# require 'bio-logger'
|
28
|
+
# log = Bio::Log::LoggerPlus.new 'exominer'
|
29
|
+
# log.outputters = Bio::Log::Outputter.stderr
|
30
|
+
# Bio::Log::CLI.logger('stderr')
|
31
|
+
# Bio::Log::CLI.trace('info')
|
32
|
+
|
33
|
+
options = { show_help: false, symbols: []}
|
34
|
+
opts = OptionParser.new do |o|
|
35
|
+
o.banner = "Usage: #{File.basename($0)} [options] filename\ne.g. #{File.basename($0)} -s ncbi_symbols.tab --rdf < test.txt"
|
36
|
+
|
37
|
+
o.on("--rdf", "Generate RDF") do |b|
|
38
|
+
options[:rdf] = true
|
39
|
+
end
|
40
|
+
o.on("--name name", 'Set name of result set') do |name|
|
41
|
+
options[:name] = name
|
42
|
+
end
|
43
|
+
o.on("-s","--symbols fn", 'Symbol file') do |fn|
|
44
|
+
options[:symbols] += [fn]
|
45
|
+
end
|
46
|
+
o.on("--hugo [fn]", 'Hugo symbol file') do |fn|
|
47
|
+
if fn
|
48
|
+
options[:hugo] = fn
|
49
|
+
else
|
50
|
+
options[:hugo] = gempath + '/test/data/input/hugo_symbols'
|
51
|
+
end
|
52
|
+
end
|
53
|
+
o.on("-i","--ignore fn", 'Ignore symbols in fn (NYI)') do |fn|
|
54
|
+
options[:ignore] = fn
|
55
|
+
end
|
56
|
+
o.on("--context [TYPE]",[:off,:line], 'Context parser mode (off,line)') do |context|
|
57
|
+
options[:context] = context
|
58
|
+
end
|
59
|
+
o.on("--doi doi", 'DOI') do |doi|
|
60
|
+
options[:doi] = doi
|
61
|
+
end
|
62
|
+
o.on("--tag string", 'Tag string') do |tag|
|
63
|
+
tags = {}
|
64
|
+
ts = tag.split(/\s?;\s?/)
|
65
|
+
ts.each do |field|
|
66
|
+
key,value = field.strip.split(/\s?=\s?/)
|
67
|
+
tags[key.to_sym] = value
|
68
|
+
end
|
69
|
+
options[:tags] = tags
|
70
|
+
end
|
71
|
+
|
72
|
+
# Uncomment the following when using the bio-logger
|
73
|
+
# o.separator ""
|
74
|
+
# o.on("--logger filename",String,"Log to file (default stderr)") do | name |
|
75
|
+
# Bio::Log::CLI.logger(name)
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
|
79
|
+
# Bio::Log::CLI.trace(s)
|
80
|
+
# end
|
81
|
+
#
|
82
|
+
# o.on("-q", "--quiet", "Run quietly") do |q|
|
83
|
+
# Bio::Log::CLI.trace('error')
|
84
|
+
# end
|
85
|
+
#
|
86
|
+
# o.on("-v", "--verbose", "Run verbosely") do |v|
|
87
|
+
# Bio::Log::CLI.trace('info')
|
88
|
+
# end
|
89
|
+
#
|
90
|
+
# o.on("--debug", "Show debug messages") do |v|
|
91
|
+
# Bio::Log::CLI.trace('debug')
|
92
|
+
# end
|
93
|
+
|
94
|
+
o.separator ""
|
95
|
+
o.on_tail('-h', '--help', 'display this help and exit') do
|
96
|
+
options[:show_help] = true
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
require 'bio-exominer/textparser'
|
101
|
+
require 'bio-exominer/symbols'
|
102
|
+
require 'yaml'
|
103
|
+
|
104
|
+
include BioExominer
|
105
|
+
|
106
|
+
begin
|
107
|
+
opts.parse!(ARGV)
|
108
|
+
|
109
|
+
if options[:show_help]
|
110
|
+
print opts
|
111
|
+
exit 1
|
112
|
+
end
|
113
|
+
|
114
|
+
$stderr.print options
|
115
|
+
|
116
|
+
# Create unique name for the resultset
|
117
|
+
doi = options[:doi]
|
118
|
+
name = options[:name]
|
119
|
+
name = options[:tags][:name].to_sym if not name and options[:tags] and options[:tags][:name]
|
120
|
+
name = doi if not name and doi
|
121
|
+
name = rand(36**8).to_s(36) if not name # finally a random ID, if nothing else works
|
122
|
+
give_context = options[:context] != :off
|
123
|
+
|
124
|
+
# context may be override by tags
|
125
|
+
options[:context]=options[:tags][:context] if options[:tags][:context]
|
126
|
+
|
127
|
+
# Uncomment when using the bio-logger
|
128
|
+
# Bio::Log::CLI.configure('exominer')
|
129
|
+
# logger = Bio::Log::LoggerPlus['exominer']
|
130
|
+
# Log parsed options and remaining arguments in ARGV
|
131
|
+
# logger.info [options, ARGV]
|
132
|
+
|
133
|
+
$stderr.print "\nLoading text..."
|
134
|
+
buf = ARGF.read
|
135
|
+
$stderr.print "\nTokenizing..."
|
136
|
+
tokens,context =
|
137
|
+
TextParser::tokenize_with_context(buf,options[:context])
|
138
|
+
|
139
|
+
symbol_count = 0
|
140
|
+
alias_count = 0
|
141
|
+
hugo_count = 0
|
142
|
+
hugo_matches = {}
|
143
|
+
symbol_matches = {} # match symbols
|
144
|
+
alias_matches = {} # match aliases
|
145
|
+
info = {} # the main symbol match tracker
|
146
|
+
hugo = {} # HUGO tracker
|
147
|
+
|
148
|
+
$stderr.print "\nParse symbol files..."
|
149
|
+
parse_symbols = lambda { |symbolfn,is_hugo=false|
|
150
|
+
# ---- for every symbol file
|
151
|
+
$stderr.print "\nParse symbol file #{symbolfn}..."
|
152
|
+
Symbols::each(symbolfn) do | symbol,aliases,descr |
|
153
|
+
# ---- for every symbol and aliases
|
154
|
+
# $stderr.print "\nHUGO-"+symbol if symbol =~ /L3MBTL/
|
155
|
+
hugo[symbol] = true if is_hugo
|
156
|
+
# alias_count += aliases.size if aliases
|
157
|
+
# ---- If the symbol has a match, and it is not in the list, add it
|
158
|
+
if tokens[symbol] and not info[symbol]
|
159
|
+
symbol_matches[symbol] = tokens[symbol]
|
160
|
+
hugo_matches[symbol] ||= true if is_hugo
|
161
|
+
info[symbol] = { :symbol => symbol, :is_hugo=>is_hugo, :aliases => aliases, :descr => descr, :symbolfn => symbolfn }
|
162
|
+
end
|
163
|
+
# ---- If an alias has a match and is not in the list, add it
|
164
|
+
if aliases
|
165
|
+
aliases.each do | word |
|
166
|
+
# $stderr.print "\n!!"+word+':'+symbol if word == "L3MBTL"
|
167
|
+
# full = word + ' (' + symbol + ')'
|
168
|
+
if tokens[word] and not info[word]
|
169
|
+
# $stderr.print "\n=="+word+':'+symbol if word == "L3MBTL"
|
170
|
+
hugo_matches[word] ||= true if hugo[word]
|
171
|
+
alias_matches[word] = tokens[word]
|
172
|
+
info[word] = { :symbol => symbol, :is_hugo=>hugo[word], :aliases => aliases, :descr => descr, :symbolfn => symbolfn }
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
}
|
178
|
+
|
179
|
+
parse_symbols.call(options[:hugo],is_hugo = true) if options[:hugo]
|
180
|
+
options[:symbols].each { |fn| parse_symbols.call(fn) }
|
181
|
+
|
182
|
+
if options[:rdf]
|
183
|
+
# Write RDF!
|
184
|
+
print <<HEADER
|
185
|
+
|
186
|
+
# RDF output by bio-exominer https://github.com/pjotrp/bioruby-exominer
|
187
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
188
|
+
@prefix dc: <http://purl.org/dc/elements/1.1/> .
|
189
|
+
@prefix hgnc: <http://identifiers.org/hgnc.symbol/> .
|
190
|
+
@prefix doi: <http://dx.doi.org/> .
|
191
|
+
@prefix bibo: <http://purl.org/ontology/bibo/> .
|
192
|
+
@prefix ncbigene: <https://www.google.nl/search?q=ncbi+gene+alias+> .
|
193
|
+
@prefix : <http://biobeat.org/rdf/exominer/ns#> .
|
194
|
+
|
195
|
+
HEADER
|
196
|
+
|
197
|
+
# Fix DOI with http://www.doi.org/doi_handbook/2_Numbering.html#2.6.2
|
198
|
+
print ":#{name} :doi \"doi:#{doi}\" . \n" if doi
|
199
|
+
print ":#{name} rdf:label \"#{name}\" . \n"
|
200
|
+
print ":#{name} a \"text resource\" . \n"
|
201
|
+
print ":#{name} dc:type \"#{options[:type]}\" . \n" if options[:type]
|
202
|
+
if options[:tags]
|
203
|
+
options[:tags].each do | k,v |
|
204
|
+
print ":#{name} :#{k.to_sym} \"#{v}\" .\n"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
print "\n"
|
208
|
+
|
209
|
+
write_symbol_rdf = lambda { |symbol,freq,is_alias=false|
|
210
|
+
match_info = info[symbol]
|
211
|
+
match_context = context[symbol].join("; ")
|
212
|
+
symbol1 = match_info[:symbol]
|
213
|
+
symboluri = Symbols::uri(symbol1,hugo)
|
214
|
+
symbolidentifier = RDF::make_identifier(symbol1)
|
215
|
+
symbol1 = symbolidentifier if not hugo[symbol1]
|
216
|
+
symbolref = name + '_' + symbol1
|
217
|
+
print ":#{symbolref} a #{symboluri} .\n"
|
218
|
+
# print ":#{symbolref} :alias #{Symbols::uri(symbol,hugo)} .\n" if symbol1 != symbol
|
219
|
+
print ":#{symbolref} :textmatch \"#{symbol}\" .\n"
|
220
|
+
print ":#{symbolref} dc:partOf :#{name} .\n"
|
221
|
+
print ":#{symbolref} :frequency #{freq} .\n"
|
222
|
+
print ":#{symbolref} :context \"#{match_context.encode("UTF-8").encode(:xml => :text)}\" .\n" if give_context
|
223
|
+
print "#{symboluri} rdf:label \"#{symboluri}\" .\n"
|
224
|
+
print "#{symboluri} rdf:comment \"#{info[symbol][:descr]}\" .\n"
|
225
|
+
print "\n"
|
226
|
+
}
|
227
|
+
|
228
|
+
alias_matches.each do | symbol, freq |
|
229
|
+
write_symbol_rdf.call(symbol,freq,is_alias=true) if not symbol_matches[symbol]
|
230
|
+
end
|
231
|
+
symbol_matches.each do | symbol, freq |
|
232
|
+
write_symbol_rdf.call(symbol,freq,is_alias=false)
|
233
|
+
end
|
234
|
+
else
|
235
|
+
print "\nMatching symbol aliases:\n"
|
236
|
+
alias_matches.sort_by{|k,v| v}.each do | k,v |
|
237
|
+
print v,"\t",(hugo[info[k][:symbol]]?" HUGO":""),"\t",k,"\t",info[k][:symbol],"\t",info[k][:descr],"\n" if not symbol_matches[k]
|
238
|
+
end
|
239
|
+
print "\nMatching symbols:\n"
|
240
|
+
symbol_matches.sort_by{|k,v| v}.each do | k,v |
|
241
|
+
print v,"\t",(hugo[k]?" HUGO":""),"\t",k,"\t",info[k][:descr],"\n"
|
242
|
+
end
|
243
|
+
end
|
244
|
+
# $stderr.print "\nA total of #{symbol_count} symbols and #{alias_count} aliases scanned."
|
245
|
+
$stderr.print "\nThere were #{hugo_matches.size} HUGO matches out of #{hugo.size} symbols."
|
246
|
+
$stderr.print "\nDone!\n"
|
247
|
+
rescue OptionParser::InvalidOption => e
|
248
|
+
options[:invalid_argument] = e.message
|
249
|
+
end
|
250
|
+
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Convert NCBI gene_info to symbol file and calculate used letter
|
4
|
+
# frequencies. Note: all symbols that are numbers are removed.
|
5
|
+
#
|
6
|
+
# Copyright (C) 2013 Pjotr Prins <pjotr.prins@thebird.nl>
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'yaml'
|
10
|
+
|
11
|
+
# Returns true or false
|
12
|
+
def valid_symbol s
|
13
|
+
s = s.strip
|
14
|
+
return false if s =~ /^\d+$/ # drop all digit id's
|
15
|
+
$stderr.print "Symbol contains a space! <"+s+">\n" if s =~ /\s/
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
module Freq
|
20
|
+
def Freq::to_s freq
|
21
|
+
buf = freq.to_yaml + "\n"
|
22
|
+
freq.keys.sort.each do |c|
|
23
|
+
buf += c
|
24
|
+
end
|
25
|
+
buf
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
freq = {}
|
30
|
+
|
31
|
+
counter = 0
|
32
|
+
|
33
|
+
ARGF.each_line do | line |
|
34
|
+
counter += 1
|
35
|
+
$stderr.print "." if counter % 10_000 == 0
|
36
|
+
|
37
|
+
next if line =~ /^HGNC ID/
|
38
|
+
a = line.strip.split(/\t/)
|
39
|
+
symbol = a[1]
|
40
|
+
next if not valid_symbol(symbol)
|
41
|
+
name = a[2]
|
42
|
+
oldnames = nil
|
43
|
+
oldnames = a[4].strip.split(/\s?,\s?/) if a.size > 4
|
44
|
+
aliases = nil
|
45
|
+
aliases = a[6].strip.split(/\s?,\s?/) if a.size > 6
|
46
|
+
# p [a[4],a[6]]
|
47
|
+
as = []
|
48
|
+
as = aliases if aliases
|
49
|
+
as += oldnames if oldnames
|
50
|
+
as.reject! { |c| c.empty? }
|
51
|
+
aliases = if as.size == 0
|
52
|
+
'NA'
|
53
|
+
else
|
54
|
+
as.uniq.join('|')
|
55
|
+
end
|
56
|
+
descr = a[2]
|
57
|
+
descr = '' if descr == '-'
|
58
|
+
print symbol,"\t",aliases,"\t",descr,"\n"
|
59
|
+
# Add stats
|
60
|
+
cs = symbol.scan(/./)
|
61
|
+
if aliases != 'NA'
|
62
|
+
cs += aliases.scan(/./) - ['|']
|
63
|
+
end
|
64
|
+
cs.each do |c|
|
65
|
+
freq[c] = 0 if not freq[c]
|
66
|
+
freq[c] += 1
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
File.open('hugo_exominer_symbols.freq','w') do |f|
|
71
|
+
f.print(Freq::to_s(freq))
|
72
|
+
end
|
73
|
+
|
74
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Convert HUGO gene_info to symbol file and calculate used letter
|
4
|
+
# frequencies. Note: all symbols that are numbers are removed.
|
5
|
+
#
|
6
|
+
# Copyright (C) 2013 Pjotr Prins <pjotr.prins@thebird.nl>
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'yaml'
|
10
|
+
|
11
|
+
# Returns true or false
|
12
|
+
def valid_symbol s
|
13
|
+
s = s.strip
|
14
|
+
return false if s =~ /^\d+$/ # drop all digit id's
|
15
|
+
$stderr.print "Symbol contains a space! <"+s+">\n" if s =~ /\s/
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
module Freq
|
20
|
+
def Freq::to_s freq
|
21
|
+
buf = freq.to_yaml + "\n"
|
22
|
+
freq.keys.sort.each do |c|
|
23
|
+
buf += c
|
24
|
+
end
|
25
|
+
buf
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
freq = {}
|
30
|
+
|
31
|
+
counter = 0
|
32
|
+
|
33
|
+
ARGF.each_line do | line |
|
34
|
+
counter += 1
|
35
|
+
$stderr.print "." if counter % 100_000 == 0
|
36
|
+
|
37
|
+
next if line =~ /^#/
|
38
|
+
a = line.strip.split(/\t/)
|
39
|
+
symbol = a[2]
|
40
|
+
next if symbol == 'NEWENTRY' or symbol == '-'
|
41
|
+
# Skip gene names that are numbers only
|
42
|
+
next if not valid_symbol(symbol)
|
43
|
+
name = a[3]
|
44
|
+
aliases = a[4]
|
45
|
+
if name != '-' and name != symbol and valid_symbol(name)
|
46
|
+
if aliases == '-'
|
47
|
+
aliases = name
|
48
|
+
else
|
49
|
+
aliases += '|'+name
|
50
|
+
end
|
51
|
+
end
|
52
|
+
aliases =
|
53
|
+
if aliases == '-'
|
54
|
+
'NA'
|
55
|
+
else
|
56
|
+
as1 = aliases.split(/\|/)
|
57
|
+
# Skip gene names that are numbers only
|
58
|
+
as2 = as1.delete_if { |a| not valid_symbol(a) }
|
59
|
+
as2.uniq.join('|')
|
60
|
+
end
|
61
|
+
descr = a[8]
|
62
|
+
descr = '' if descr == '-'
|
63
|
+
print symbol,"\t",aliases,"\t",descr,"\n"
|
64
|
+
# Add stats
|
65
|
+
cs = symbol.scan(/./)
|
66
|
+
if aliases != 'NA'
|
67
|
+
cs += aliases.scan(/./) - ['|']
|
68
|
+
end
|
69
|
+
cs.each do |c|
|
70
|
+
freq[c] = 0 if not freq[c]
|
71
|
+
freq[c] += 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
File.open('ncbi_exominer_symbols.freq','w') do |f|
|
76
|
+
f.print(Freq::to_s(freq))
|
77
|
+
end
|
78
|
+
|
79
|
+
|