bio-exominer 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +1 -0
- data/.travis.yml +14 -0
- data/Gemfile +17 -0
- data/LICENSE.txt +20 -0
- data/README.md +413 -0
- data/Rakefile +58 -0
- data/VERSION +1 -0
- data/bin/exominer +250 -0
- data/bin/hugo_exominer_symbols +74 -0
- data/bin/ncbi_exominer_symbols +79 -0
- data/bin/pack_exominer_symbols +38 -0
- data/features/bio-exominer.feature +9 -0
- data/features/step_definitions/bio-exominer_steps.rb +0 -0
- data/features/support/env.rb +13 -0
- data/lib/bio-exominer.rb +14 -0
- data/lib/bio-exominer/exominer.rb +3 -0
- data/lib/bio-exominer/rdf.rb +38 -0
- data/lib/bio-exominer/symbols.rb +49 -0
- data/lib/bio-exominer/textparser.rb +124 -0
- data/scripts/4store.sh +30 -0
- data/scripts/example.sh +9 -0
- data/scripts/example_rdf.sh +7 -0
- data/scripts/load_rdf.sh +15 -0
- data/spec/bio-exominer_spec.rb +8 -0
- data/spec/rdf_spec.rb +28 -0
- data/spec/spec_helper.rb +19 -0
- data/spec/text_parser_spec.rb +59 -0
- data/test/data/input/hugo_symbols +38106 -0
- metadata +195 -0
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "bio-exominer"
|
18
|
+
gem.homepage = "http://github.com/pjotrp/bioruby-exominer"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = %Q{Mine publications for gene names}
|
21
|
+
gem.description = %Q{Parse publications for gene names in a fuzzy fashion}
|
22
|
+
gem.email = "pjotr.public01@thebird.nl"
|
23
|
+
gem.authors = ["Pjotr Prins"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
# require 'rspec/core'
|
29
|
+
# require 'rspec/core/rake_task'
|
30
|
+
# RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
# spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
# end
|
33
|
+
|
34
|
+
# RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
# spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
# spec.rcov = true
|
37
|
+
# end
|
38
|
+
|
39
|
+
require 'rake/testtask'
|
40
|
+
|
41
|
+
Rake::TestTask.new do |t|
|
42
|
+
t.pattern = "spec/*_spec.rb"
|
43
|
+
end
|
44
|
+
|
45
|
+
require 'cucumber/rake/task'
|
46
|
+
Cucumber::Rake::Task.new(:features)
|
47
|
+
|
48
|
+
task :default => :test
|
49
|
+
|
50
|
+
require 'rdoc/task'
|
51
|
+
Rake::RDocTask.new do |rdoc|
|
52
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
53
|
+
|
54
|
+
rdoc.rdoc_dir = 'rdoc'
|
55
|
+
rdoc.title = "bio-exominer #{version}"
|
56
|
+
rdoc.rdoc_files.include('README*')
|
57
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
58
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.3
|
data/bin/exominer
ADDED
@@ -0,0 +1,250 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# BioRuby exominer Plugin BioExominer
|
4
|
+
# Author:: Pjotr Prins
|
5
|
+
#
|
6
|
+
# Copyright (C) 2013,2014 Cuppen Group & Pjotr Prins <pjotr.prins@thebird.nl>
|
7
|
+
|
8
|
+
USAGE = "exominer takes a symbol file and parses the piped data for gene symbols"
|
9
|
+
|
10
|
+
gempath = File.dirname(File.dirname(__FILE__))
|
11
|
+
$: << File.join(gempath,'lib')
|
12
|
+
|
13
|
+
VERSION_FILENAME=File.join(gempath,'VERSION')
|
14
|
+
version = File.new(VERSION_FILENAME).read.chomp
|
15
|
+
|
16
|
+
# print banner
|
17
|
+
$stderr.print "exominer #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2014\n"
|
18
|
+
|
19
|
+
if ARGV.size == 0
|
20
|
+
print USAGE
|
21
|
+
end
|
22
|
+
|
23
|
+
require 'bio-exominer'
|
24
|
+
require 'optparse'
|
25
|
+
|
26
|
+
# Uncomment when using the bio-logger
|
27
|
+
# require 'bio-logger'
|
28
|
+
# log = Bio::Log::LoggerPlus.new 'exominer'
|
29
|
+
# log.outputters = Bio::Log::Outputter.stderr
|
30
|
+
# Bio::Log::CLI.logger('stderr')
|
31
|
+
# Bio::Log::CLI.trace('info')
|
32
|
+
|
33
|
+
options = { show_help: false, symbols: []}
|
34
|
+
opts = OptionParser.new do |o|
|
35
|
+
o.banner = "Usage: #{File.basename($0)} [options] filename\ne.g. #{File.basename($0)} -s ncbi_symbols.tab --rdf < test.txt"
|
36
|
+
|
37
|
+
o.on("--rdf", "Generate RDF") do |b|
|
38
|
+
options[:rdf] = true
|
39
|
+
end
|
40
|
+
o.on("--name name", 'Set name of result set') do |name|
|
41
|
+
options[:name] = name
|
42
|
+
end
|
43
|
+
o.on("-s","--symbols fn", 'Symbol file') do |fn|
|
44
|
+
options[:symbols] += [fn]
|
45
|
+
end
|
46
|
+
o.on("--hugo [fn]", 'Hugo symbol file') do |fn|
|
47
|
+
if fn
|
48
|
+
options[:hugo] = fn
|
49
|
+
else
|
50
|
+
options[:hugo] = gempath + '/test/data/input/hugo_symbols'
|
51
|
+
end
|
52
|
+
end
|
53
|
+
o.on("-i","--ignore fn", 'Ignore symbols in fn (NYI)') do |fn|
|
54
|
+
options[:ignore] = fn
|
55
|
+
end
|
56
|
+
o.on("--context [TYPE]",[:off,:line], 'Context parser mode (off,line)') do |context|
|
57
|
+
options[:context] = context
|
58
|
+
end
|
59
|
+
o.on("--doi doi", 'DOI') do |doi|
|
60
|
+
options[:doi] = doi
|
61
|
+
end
|
62
|
+
o.on("--tag string", 'Tag string') do |tag|
|
63
|
+
tags = {}
|
64
|
+
ts = tag.split(/\s?;\s?/)
|
65
|
+
ts.each do |field|
|
66
|
+
key,value = field.strip.split(/\s?=\s?/)
|
67
|
+
tags[key.to_sym] = value
|
68
|
+
end
|
69
|
+
options[:tags] = tags
|
70
|
+
end
|
71
|
+
|
72
|
+
# Uncomment the following when using the bio-logger
|
73
|
+
# o.separator ""
|
74
|
+
# o.on("--logger filename",String,"Log to file (default stderr)") do | name |
|
75
|
+
# Bio::Log::CLI.logger(name)
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
|
79
|
+
# Bio::Log::CLI.trace(s)
|
80
|
+
# end
|
81
|
+
#
|
82
|
+
# o.on("-q", "--quiet", "Run quietly") do |q|
|
83
|
+
# Bio::Log::CLI.trace('error')
|
84
|
+
# end
|
85
|
+
#
|
86
|
+
# o.on("-v", "--verbose", "Run verbosely") do |v|
|
87
|
+
# Bio::Log::CLI.trace('info')
|
88
|
+
# end
|
89
|
+
#
|
90
|
+
# o.on("--debug", "Show debug messages") do |v|
|
91
|
+
# Bio::Log::CLI.trace('debug')
|
92
|
+
# end
|
93
|
+
|
94
|
+
o.separator ""
|
95
|
+
o.on_tail('-h', '--help', 'display this help and exit') do
|
96
|
+
options[:show_help] = true
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
require 'bio-exominer/textparser'
|
101
|
+
require 'bio-exominer/symbols'
|
102
|
+
require 'yaml'
|
103
|
+
|
104
|
+
include BioExominer
|
105
|
+
|
106
|
+
begin
|
107
|
+
opts.parse!(ARGV)
|
108
|
+
|
109
|
+
if options[:show_help]
|
110
|
+
print opts
|
111
|
+
exit 1
|
112
|
+
end
|
113
|
+
|
114
|
+
$stderr.print options
|
115
|
+
|
116
|
+
# Create unique name for the resultset
|
117
|
+
doi = options[:doi]
|
118
|
+
name = options[:name]
|
119
|
+
name = options[:tags][:name].to_sym if not name and options[:tags] and options[:tags][:name]
|
120
|
+
name = doi if not name and doi
|
121
|
+
name = rand(36**8).to_s(36) if not name # finally a random ID, if nothing else works
|
122
|
+
give_context = options[:context] != :off
|
123
|
+
|
124
|
+
# context may be override by tags
|
125
|
+
options[:context]=options[:tags][:context] if options[:tags][:context]
|
126
|
+
|
127
|
+
# Uncomment when using the bio-logger
|
128
|
+
# Bio::Log::CLI.configure('exominer')
|
129
|
+
# logger = Bio::Log::LoggerPlus['exominer']
|
130
|
+
# Log parsed options and remaining arguments in ARGV
|
131
|
+
# logger.info [options, ARGV]
|
132
|
+
|
133
|
+
$stderr.print "\nLoading text..."
|
134
|
+
buf = ARGF.read
|
135
|
+
$stderr.print "\nTokenizing..."
|
136
|
+
tokens,context =
|
137
|
+
TextParser::tokenize_with_context(buf,options[:context])
|
138
|
+
|
139
|
+
symbol_count = 0
|
140
|
+
alias_count = 0
|
141
|
+
hugo_count = 0
|
142
|
+
hugo_matches = {}
|
143
|
+
symbol_matches = {} # match symbols
|
144
|
+
alias_matches = {} # match aliases
|
145
|
+
info = {} # the main symbol match tracker
|
146
|
+
hugo = {} # HUGO tracker
|
147
|
+
|
148
|
+
$stderr.print "\nParse symbol files..."
|
149
|
+
parse_symbols = lambda { |symbolfn,is_hugo=false|
|
150
|
+
# ---- for every symbol file
|
151
|
+
$stderr.print "\nParse symbol file #{symbolfn}..."
|
152
|
+
Symbols::each(symbolfn) do | symbol,aliases,descr |
|
153
|
+
# ---- for every symbol and aliases
|
154
|
+
# $stderr.print "\nHUGO-"+symbol if symbol =~ /L3MBTL/
|
155
|
+
hugo[symbol] = true if is_hugo
|
156
|
+
# alias_count += aliases.size if aliases
|
157
|
+
# ---- If the symbol has a match, and it is not in the list, add it
|
158
|
+
if tokens[symbol] and not info[symbol]
|
159
|
+
symbol_matches[symbol] = tokens[symbol]
|
160
|
+
hugo_matches[symbol] ||= true if is_hugo
|
161
|
+
info[symbol] = { :symbol => symbol, :is_hugo=>is_hugo, :aliases => aliases, :descr => descr, :symbolfn => symbolfn }
|
162
|
+
end
|
163
|
+
# ---- If an alias has a match and is not in the list, add it
|
164
|
+
if aliases
|
165
|
+
aliases.each do | word |
|
166
|
+
# $stderr.print "\n!!"+word+':'+symbol if word == "L3MBTL"
|
167
|
+
# full = word + ' (' + symbol + ')'
|
168
|
+
if tokens[word] and not info[word]
|
169
|
+
# $stderr.print "\n=="+word+':'+symbol if word == "L3MBTL"
|
170
|
+
hugo_matches[word] ||= true if hugo[word]
|
171
|
+
alias_matches[word] = tokens[word]
|
172
|
+
info[word] = { :symbol => symbol, :is_hugo=>hugo[word], :aliases => aliases, :descr => descr, :symbolfn => symbolfn }
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
}
|
178
|
+
|
179
|
+
parse_symbols.call(options[:hugo],is_hugo = true) if options[:hugo]
|
180
|
+
options[:symbols].each { |fn| parse_symbols.call(fn) }
|
181
|
+
|
182
|
+
if options[:rdf]
|
183
|
+
# Write RDF!
|
184
|
+
print <<HEADER
|
185
|
+
|
186
|
+
# RDF output by bio-exominer https://github.com/pjotrp/bioruby-exominer
|
187
|
+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
|
188
|
+
@prefix dc: <http://purl.org/dc/elements/1.1/> .
|
189
|
+
@prefix hgnc: <http://identifiers.org/hgnc.symbol/> .
|
190
|
+
@prefix doi: <http://dx.doi.org/> .
|
191
|
+
@prefix bibo: <http://purl.org/ontology/bibo/> .
|
192
|
+
@prefix ncbigene: <https://www.google.nl/search?q=ncbi+gene+alias+> .
|
193
|
+
@prefix : <http://biobeat.org/rdf/exominer/ns#> .
|
194
|
+
|
195
|
+
HEADER
|
196
|
+
|
197
|
+
# Fix DOI with http://www.doi.org/doi_handbook/2_Numbering.html#2.6.2
|
198
|
+
print ":#{name} :doi \"doi:#{doi}\" . \n" if doi
|
199
|
+
print ":#{name} rdf:label \"#{name}\" . \n"
|
200
|
+
print ":#{name} a \"text resource\" . \n"
|
201
|
+
print ":#{name} dc:type \"#{options[:type]}\" . \n" if options[:type]
|
202
|
+
if options[:tags]
|
203
|
+
options[:tags].each do | k,v |
|
204
|
+
print ":#{name} :#{k.to_sym} \"#{v}\" .\n"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
print "\n"
|
208
|
+
|
209
|
+
write_symbol_rdf = lambda { |symbol,freq,is_alias=false|
|
210
|
+
match_info = info[symbol]
|
211
|
+
match_context = context[symbol].join("; ")
|
212
|
+
symbol1 = match_info[:symbol]
|
213
|
+
symboluri = Symbols::uri(symbol1,hugo)
|
214
|
+
symbolidentifier = RDF::make_identifier(symbol1)
|
215
|
+
symbol1 = symbolidentifier if not hugo[symbol1]
|
216
|
+
symbolref = name + '_' + symbol1
|
217
|
+
print ":#{symbolref} a #{symboluri} .\n"
|
218
|
+
# print ":#{symbolref} :alias #{Symbols::uri(symbol,hugo)} .\n" if symbol1 != symbol
|
219
|
+
print ":#{symbolref} :textmatch \"#{symbol}\" .\n"
|
220
|
+
print ":#{symbolref} dc:partOf :#{name} .\n"
|
221
|
+
print ":#{symbolref} :frequency #{freq} .\n"
|
222
|
+
print ":#{symbolref} :context \"#{match_context.encode("UTF-8").encode(:xml => :text)}\" .\n" if give_context
|
223
|
+
print "#{symboluri} rdf:label \"#{symboluri}\" .\n"
|
224
|
+
print "#{symboluri} rdf:comment \"#{info[symbol][:descr]}\" .\n"
|
225
|
+
print "\n"
|
226
|
+
}
|
227
|
+
|
228
|
+
alias_matches.each do | symbol, freq |
|
229
|
+
write_symbol_rdf.call(symbol,freq,is_alias=true) if not symbol_matches[symbol]
|
230
|
+
end
|
231
|
+
symbol_matches.each do | symbol, freq |
|
232
|
+
write_symbol_rdf.call(symbol,freq,is_alias=false)
|
233
|
+
end
|
234
|
+
else
|
235
|
+
print "\nMatching symbol aliases:\n"
|
236
|
+
alias_matches.sort_by{|k,v| v}.each do | k,v |
|
237
|
+
print v,"\t",(hugo[info[k][:symbol]]?" HUGO":""),"\t",k,"\t",info[k][:symbol],"\t",info[k][:descr],"\n" if not symbol_matches[k]
|
238
|
+
end
|
239
|
+
print "\nMatching symbols:\n"
|
240
|
+
symbol_matches.sort_by{|k,v| v}.each do | k,v |
|
241
|
+
print v,"\t",(hugo[k]?" HUGO":""),"\t",k,"\t",info[k][:descr],"\n"
|
242
|
+
end
|
243
|
+
end
|
244
|
+
# $stderr.print "\nA total of #{symbol_count} symbols and #{alias_count} aliases scanned."
|
245
|
+
$stderr.print "\nThere were #{hugo_matches.size} HUGO matches out of #{hugo.size} symbols."
|
246
|
+
$stderr.print "\nDone!\n"
|
247
|
+
rescue OptionParser::InvalidOption => e
|
248
|
+
options[:invalid_argument] = e.message
|
249
|
+
end
|
250
|
+
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Convert NCBI gene_info to symbol file and calculate used letter
|
4
|
+
# frequencies. Note: all symbols that are numbers are removed.
|
5
|
+
#
|
6
|
+
# Copyright (C) 2013 Pjotr Prins <pjotr.prins@thebird.nl>
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'yaml'
|
10
|
+
|
11
|
+
# Returns true or false
|
12
|
+
def valid_symbol s
|
13
|
+
s = s.strip
|
14
|
+
return false if s =~ /^\d+$/ # drop all digit id's
|
15
|
+
$stderr.print "Symbol contains a space! <"+s+">\n" if s =~ /\s/
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
module Freq
|
20
|
+
def Freq::to_s freq
|
21
|
+
buf = freq.to_yaml + "\n"
|
22
|
+
freq.keys.sort.each do |c|
|
23
|
+
buf += c
|
24
|
+
end
|
25
|
+
buf
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
freq = {}
|
30
|
+
|
31
|
+
counter = 0
|
32
|
+
|
33
|
+
ARGF.each_line do | line |
|
34
|
+
counter += 1
|
35
|
+
$stderr.print "." if counter % 10_000 == 0
|
36
|
+
|
37
|
+
next if line =~ /^HGNC ID/
|
38
|
+
a = line.strip.split(/\t/)
|
39
|
+
symbol = a[1]
|
40
|
+
next if not valid_symbol(symbol)
|
41
|
+
name = a[2]
|
42
|
+
oldnames = nil
|
43
|
+
oldnames = a[4].strip.split(/\s?,\s?/) if a.size > 4
|
44
|
+
aliases = nil
|
45
|
+
aliases = a[6].strip.split(/\s?,\s?/) if a.size > 6
|
46
|
+
# p [a[4],a[6]]
|
47
|
+
as = []
|
48
|
+
as = aliases if aliases
|
49
|
+
as += oldnames if oldnames
|
50
|
+
as.reject! { |c| c.empty? }
|
51
|
+
aliases = if as.size == 0
|
52
|
+
'NA'
|
53
|
+
else
|
54
|
+
as.uniq.join('|')
|
55
|
+
end
|
56
|
+
descr = a[2]
|
57
|
+
descr = '' if descr == '-'
|
58
|
+
print symbol,"\t",aliases,"\t",descr,"\n"
|
59
|
+
# Add stats
|
60
|
+
cs = symbol.scan(/./)
|
61
|
+
if aliases != 'NA'
|
62
|
+
cs += aliases.scan(/./) - ['|']
|
63
|
+
end
|
64
|
+
cs.each do |c|
|
65
|
+
freq[c] = 0 if not freq[c]
|
66
|
+
freq[c] += 1
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
File.open('hugo_exominer_symbols.freq','w') do |f|
|
71
|
+
f.print(Freq::to_s(freq))
|
72
|
+
end
|
73
|
+
|
74
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Convert HUGO gene_info to symbol file and calculate used letter
|
4
|
+
# frequencies. Note: all symbols that are numbers are removed.
|
5
|
+
#
|
6
|
+
# Copyright (C) 2013 Pjotr Prins <pjotr.prins@thebird.nl>
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'yaml'
|
10
|
+
|
11
|
+
# Returns true or false
|
12
|
+
def valid_symbol s
|
13
|
+
s = s.strip
|
14
|
+
return false if s =~ /^\d+$/ # drop all digit id's
|
15
|
+
$stderr.print "Symbol contains a space! <"+s+">\n" if s =~ /\s/
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
module Freq
|
20
|
+
def Freq::to_s freq
|
21
|
+
buf = freq.to_yaml + "\n"
|
22
|
+
freq.keys.sort.each do |c|
|
23
|
+
buf += c
|
24
|
+
end
|
25
|
+
buf
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
freq = {}
|
30
|
+
|
31
|
+
counter = 0
|
32
|
+
|
33
|
+
ARGF.each_line do | line |
|
34
|
+
counter += 1
|
35
|
+
$stderr.print "." if counter % 100_000 == 0
|
36
|
+
|
37
|
+
next if line =~ /^#/
|
38
|
+
a = line.strip.split(/\t/)
|
39
|
+
symbol = a[2]
|
40
|
+
next if symbol == 'NEWENTRY' or symbol == '-'
|
41
|
+
# Skip gene names that are numbers only
|
42
|
+
next if not valid_symbol(symbol)
|
43
|
+
name = a[3]
|
44
|
+
aliases = a[4]
|
45
|
+
if name != '-' and name != symbol and valid_symbol(name)
|
46
|
+
if aliases == '-'
|
47
|
+
aliases = name
|
48
|
+
else
|
49
|
+
aliases += '|'+name
|
50
|
+
end
|
51
|
+
end
|
52
|
+
aliases =
|
53
|
+
if aliases == '-'
|
54
|
+
'NA'
|
55
|
+
else
|
56
|
+
as1 = aliases.split(/\|/)
|
57
|
+
# Skip gene names that are numbers only
|
58
|
+
as2 = as1.delete_if { |a| not valid_symbol(a) }
|
59
|
+
as2.uniq.join('|')
|
60
|
+
end
|
61
|
+
descr = a[8]
|
62
|
+
descr = '' if descr == '-'
|
63
|
+
print symbol,"\t",aliases,"\t",descr,"\n"
|
64
|
+
# Add stats
|
65
|
+
cs = symbol.scan(/./)
|
66
|
+
if aliases != 'NA'
|
67
|
+
cs += aliases.scan(/./) - ['|']
|
68
|
+
end
|
69
|
+
cs.each do |c|
|
70
|
+
freq[c] = 0 if not freq[c]
|
71
|
+
freq[c] += 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
File.open('ncbi_exominer_symbols.freq','w') do |f|
|
76
|
+
f.print(Freq::to_s(freq))
|
77
|
+
end
|
78
|
+
|
79
|
+
|