obo_parser 0.3.4 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,6 +1,6 @@
1
1
  = obo_parser
2
2
 
3
- A simple Ruby gem for parsing OBO 1.2 formatted ontology files. Useful for reporting, comparing, and mapping data to other databases. There is presently no functionality for logical inference across the ontology.
3
+ A simple Ruby gem for parsing OBO 1.2 (?4) formatted ontology files. Useful for reporting, comparing, and mapping data to other databases. There is presently no functionality for logical inference across the ontology.
4
4
 
5
5
  == Installation
6
6
 
@@ -8,6 +8,8 @@ A simple Ruby gem for parsing OBO 1.2 formatted ontology files. Useful for repo
8
8
 
9
9
  == Use
10
10
 
11
+ === General
12
+
11
13
  require 'rubygems'
12
14
  require 'obo_parser'
13
15
  foo = parse_obo_file(File.read('my_ontology.obo')) # => An OboParser instance
@@ -36,13 +38,20 @@ A simple Ruby gem for parsing OBO 1.2 formatted ontology files. Useful for repo
36
38
 
37
39
  foo.terms.first.relationships # => [['relation_ship', 'FOO:123'], ['other_relationship', 'FOO:456'] ...] An array of [relation, related term id], includes 'is_a', 'disjoint_from' and Typedefs
38
40
 
41
+ === Convenience methods
42
+
43
+ foo.term_hash # => { term (String) => id (String), ... for each [Term] in the file. } !! Assumes names terms are unique, they might not be, in which case you get key collisions.
44
+ foo.id_hash # => { id (String) => term (String), ... for each [Term] in the file. }
45
+
39
46
  See also /test/test_obo_parser.rb
40
47
 
41
48
  == Utilties
42
49
 
43
- !! UTILTIES ARE PRESENTLY BORKED !!
50
+ A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in /lib/utilities.rb. For example, shared labels across sets of ontologies can be found and returned.
51
+
52
+ == Documentation
44
53
 
45
- A small set of methods (e.g. comparing OBO ontologies) utilizing the gem are included in utilities.rb. See /lib/utilities.rb. For example, shared labels across sets of ontologies can be found and returned.
54
+ Code documentation is slowly being formalized using Yard.
46
55
 
47
56
  == Copyright
48
57
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.4
1
+ 0.3.5
data/lib/tokens.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module OboParser::Tokens
2
2
 
3
3
  class Token
4
- # this allows access the the class attribute regexp, without using a class variable
4
+ # this allows access the to class attribute regexp, without using a class variable
5
5
  class << self; attr_reader :regexp; end
6
6
  attr_reader :value
7
7
  def initialize(str)
@@ -17,7 +17,7 @@ module OboParser::Tokens
17
17
  @regexp = Regexp.new(/\A\s*(\[typedef\])\s*/i)
18
18
  end
19
19
 
20
- # Token eeds simplification, likely through creating additional tokens for quoted qualifiers, optional modifiers ({}), and the creation of individual
20
+ # Token needs simplification, likely through creating additional tokens for quoted qualifiers, optional modifiers ({}), and the creation of individual
21
21
  # tokens for individual tags that don't conform to the pattern used for def: tags.
22
22
  # The code can't presently handle escaped characters (like \,), as bizzarely found in some OBO files.
23
23
  class TagValuePair < Token
@@ -66,6 +66,7 @@ module OboParser::Tokens
66
66
  qq = 0 # some failsafes
67
67
  while xref_list.length > 0
68
68
  qq += 1
69
+ debugger if qq == 499
69
70
  raise "#{xref_list}" if qq > 500
70
71
  xref_list.gsub!(/\A\s*,\s*/, '')
71
72
 
data/lib/utilities.rb CHANGED
@@ -4,79 +4,61 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'obo_parser'))
4
4
 
5
5
  module OboParser::Utilities
6
6
 
7
- # Example usage
8
- # of1 = File.read('hao1.obo')
9
- # of2 = File.read('hao2.obo')
10
- # of3 = File.read('hao3.obo')
11
- # of4 = File.read('hao4.obo')
7
+ # Summarizes labels used by id in a two column tab delimited format
8
+ # Providing a cutoff will report only those ids/labels with > 1 label per id
9
+ # Does not (yet) include reference to synonyms, this could be easily extended.
12
10
  #
13
- # OboParser::Utilities::dump_comparison_by_id([of1, of2, of3, of4])
14
- def self.dump_comparison_by_id(files = []) # :yields: String
11
+ #== Example use
12
+ # of1 = File.read('foo1.obo')
13
+ # of2 = File.read('foo2.obo')
14
+ # of3 = File.read('foo3.obo')
15
+ # of4 = File.read('foo4.obo')
16
+ #
17
+ # OboParser::Utilities.dump_comparison_by_id(0,[of1, of2, of3, of4])
18
+ #
19
+ # @param [Integer] cutoff only Term ids with > cutoff labels will be reported
20
+ # @param [Array] files an Array of read files
21
+ # @return [String] the transation in tab delimted format
22
+ def self.dump_comparison_by_id(cutoff = 0, files = [])
23
+ return '' if files.size < 1
24
+
15
25
  of = []
16
26
  files.each_with_index do |f, i|
17
27
  of[i] = parse_obo_file(f)
18
28
  end
19
-
29
+
20
30
  all_data = {}
21
31
 
22
32
  of.each do |f|
23
33
  tmp_hash = f.id_hash
24
34
  tmp_hash.keys.each do |id|
25
35
  if all_data[id]
26
- all_data[id].push tmp_hash[id]
36
+ all_data[id].push(tmp_hash[id])
27
37
  else
28
38
  all_data[id] = [tmp_hash[id]]
29
39
  end
30
40
  end
31
41
  end
32
42
 
33
- puts "\nA list of all labels used across all submitted files for a given ID\n\n"
34
43
  all_data.keys.sort.each do |k|
35
- if all_data[k].uniq.size > 1
36
- puts "#{k}\t: #{all_data[k].uniq.join(', ')}"
44
+ if all_data[k].uniq.size > cutoff
45
+ puts "#{k}\t#{all_data[k].uniq.join(', ')}"
37
46
  end
38
47
  end
39
48
  end
40
49
 
41
- # infile is a tab delimited 2 column file that contains IDs in the from FOO_1234
42
- # The file is replicated to STDOUT replacing the ID with the Term
43
- def self.alignment_translate(infile = nil) # :yields: String
44
- agreement = ARGV[0]
45
- raise "Provide a file with comparison." if agreement.nil?
46
- comparison = File.read(agreement)
47
-
48
- obo_files = Dir.entries('.').inject([]){|sum, a| sum.push( a =~ /\.obo\Z/ ? a : nil)}.compact!
49
- identifiers = {}
50
-
51
- obo_files.each do |f|
52
- puts "Reading: #{f}"
53
- identifiers.merge!( parse_obo_file(File.read(f)).id_hash )
54
- end
55
-
56
- comparison.each do |l|
57
- v1, v2 = l.split("\t")
58
- # puts "#{v1} - #{v2}"
59
-
60
- next if v1.nil? || v2.nil?
61
-
62
- v1.gsub!(/_/, ":")
63
- v1.strip!
64
- v2.gsub!(/_/, ":")
65
- v2.strip!
66
-
67
- puts (identifiers[v1].nil? ? 'NOT FOUND' : identifiers[v1]) +
68
- "\t" +
69
- (identifiers[v2].nil? ? 'NOT FOUND' : identifiers[v2])
70
- end
71
- end
72
-
73
- # Returns labels found in all passed ontologies
74
- # Usage:
50
+ # Returns all labels found in all passed ontologies. Does not yet include synonyms.
51
+ #
52
+ #== Example use
75
53
  # of1 = File.read('fly_anatomy.obo')
76
54
  # of2 = File.read('hao.obo')
77
55
  # of3 = File.read('mosquito_anatomy.obo')
78
- # shared_labels([of1, of6])
79
- def self.shared_labels(files = []) # :yields: String
56
+ #
57
+ # OboParser::Utilities.shared_labels([of1, of3])
58
+ #
59
+ # @param [Array] files an Array of read files
60
+ # @return [String] lables, one per line
61
+ def self.shared_labels(files = [])
80
62
  comparison = {}
81
63
 
82
64
  files.each do |f|
@@ -91,17 +73,175 @@ module OboParser::Utilities
91
73
  end
92
74
  end
93
75
  end
94
-
95
- match = []
76
+
77
+ match = []
96
78
  comparison.keys.each do |k|
97
79
  if comparison[k] == files.size
98
80
  match.push k
99
81
  end
100
82
  end
101
83
 
102
- puts match.sort.join("\n")
103
- puts "\n#{match.length} total."
84
+ puts match.sort.join("\n")
85
+ puts "\n#{match.length} total."
104
86
 
105
87
  end
106
-
88
+
89
+
90
+ #== Two column translation tools
91
+
92
+ HOMOLONTO_HEADER = %{
93
+ format-version: 1.2
94
+ auto-generated-by: obo_parser
95
+ default-namespace: fix_me
96
+
97
+ [Typedef]
98
+ id: OGEE:has_member
99
+ name: has_member
100
+ is_a: OBO_REL:relationship
101
+ def: "C has_member C', C is an homology group and C' is a biological object" []
102
+ comment: "We leave open the possibility that an homology group is a biological object. Thus, an homology group C may have C' has_member, with C' being an homology group."
103
+ is_transitive: true
104
+ is_anti_symmetric: true
105
+
106
+ }
107
+
108
+
109
+ # Takes a two column input file, references it to two ontologies, and provides a report.
110
+ #
111
+ #== Example use
112
+ # file = File.read('HAO_TGMA_list.txt')
113
+ # col1_obo = File.read('hao.obo')
114
+ # col2_obo = File.read('tgma.obo')
115
+ # column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
116
+ #
117
+ # OboParser::Utilities.column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
118
+ #== Output types
119
+ # There are several output report types
120
+ # :xls - Translates the columns in the data_file to the option passed in :translate_to, the first matching against col1_obo, the second against col2_obo. Returns an Excel file.
121
+ # :homolonto - Generates a homolonto compatible file to STDOUT
122
+ # :cols - Prints a two column format to STDOUT
123
+ #
124
+ # @param [Hash] options options.
125
+ # @param [Symbol] data the two column data file.
126
+ # @return [String] the transation in tab delimted format.
127
+ def self.column_translate(options = {})
128
+ opt = {
129
+ :data => nil,
130
+ :col1_obo => nil,
131
+ :col2_obo => nil,
132
+ :translate_to => :id, # also :label
133
+ :output => :cols, # also :xls, :homolonto
134
+ :output_filename => 'foo',
135
+ :index_start => 0
136
+ }.merge!(options)
137
+
138
+ c1obo = parse_obo_file(opt[:col1_obo])
139
+ c2obo = parse_obo_file(opt[:col2_obo])
140
+
141
+ case opt[:output]
142
+ when :xls
143
+ Spreadsheet.client_encoding = 'UTF-8'
144
+ book = Spreadsheet::Workbook.new
145
+ sheet = book.create_worksheet
146
+ when :homolonto
147
+ s = HOMOLONTO_HEADER
148
+ opt[:translate_to] = :id # force this in this mode
149
+ end
150
+
151
+ i = opt[:index_start]
152
+ v1 = nil # a label like 'head'
153
+ v2 = nil
154
+ c1 = nil # an id 'FOO:123'
155
+ c2 = nil
156
+
157
+ opt[:data].split(/\n/).each do |row|
158
+ i += 1
159
+ c1, c2 = row.split(/\t/).map(&:strip)
160
+
161
+ if c1.nil? || c2.nil?
162
+ puts
163
+ next
164
+ end
165
+
166
+ # the conversion
167
+ if opt[:translate_to] == :id
168
+ if c1 =~ /.*\:.*/ # it's an id, leave it
169
+ v1 = c1
170
+ else
171
+ v1 = c1obo.term_hash[c1]
172
+ end
173
+ if c2 =~ /.*\:.*/
174
+ v2 = c2
175
+ else
176
+ v2 = c2obo.term_hash[c2]
177
+ end
178
+ else
179
+ if c1 =~ /.*\:.*/
180
+ v1 = c1obo.id_hash[c1]
181
+ else
182
+ v1 = c1
183
+ end
184
+ if c2 =~ /.*\:.*/
185
+ v2 = c2obo.id_hash[c2]
186
+ else
187
+ v2 = c2
188
+ end
189
+ end
190
+
191
+ case opt[:output]
192
+ when :cols
193
+ puts "#{v1}\t#{v2}"
194
+ when :xls
195
+ sheet[i,0] = v1
196
+ sheet[i,1] = OboParser::Utilities.term_stanza_from_file(v1, opt[:col1_obo])
197
+ sheet[i,2] = v2
198
+ sheet[i,3] = OboParser::Utilities.term_stanza_from_file(v2, opt[:col2_obo])
199
+ when :homolonto
200
+ s << OboParser::Utilities.homolonto_stanza(i, c1obo.id_hash[v1] , v1, v2) # "#{c1obo.id_hash[v1]} ! #{c2obo.id_hash[v2]}"
201
+ s << "\n\n"
202
+ end
203
+ end
204
+
205
+ case opt[:output]
206
+ when :xls
207
+ book.write "#{opt[:output_filename]}.xls"
208
+ when :homolonto
209
+ puts s + "\n"
210
+ end
211
+
212
+ true
213
+ end
214
+
215
+ # Returns a HomolOnto Stanza
216
+ #
217
+ # @param [String] id an externally tracked id for the id: tag like '00001'
218
+ # @param [String] name a name for the name: tag
219
+ # @param [Array] members a Array of 2 or more members for the relationship: has_member tag like ['FOO:123', 'BAR:456']
220
+ # @return [String] the stanza requested
221
+ def self.homolonto_stanza(id, name, *members)
222
+ return 'NOT ENOUGH RELATIONSHIPS' if members.length < 2
223
+ s = []
224
+ s << '[Term]'
225
+ s << "id: HOG:#{id}"
226
+ s << "name: #{name}"
227
+ members.each do |m|
228
+ s << "relationship: has_member #{m}"
229
+ end
230
+ s.join("\n")
231
+ end
232
+
233
+ #== Helper methods that don't require the obo_parser library
234
+
235
+ # Given a Term id and a String representing an OBO file returns that stanza.
236
+ #
237
+ # @param [String] id a Term id like 'FOO:123'
238
+ # @param [String] file a Obo file as a String like File.read('my.obo')
239
+ # @return [String] the stanza requested
240
+ def self.term_stanza_from_file(id, file)
241
+ foo = ""
242
+ file =~ /(^\[Term\]\s*?id:\s*?#{id}.*?)(^\[Term\]|^\[Typedef\])/im
243
+ foo = $1 if !$1.nil?
244
+ foo.gsub(/\n\r/,"\n")
245
+ end
246
+
107
247
  end
data/obo_parser.gemspec CHANGED
@@ -1,51 +1,49 @@
1
1
  # Generated by jeweler
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{obo_parser}
8
- s.version = "0.3.4"
8
+ s.version = "0.3.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["mjy"]
12
- s.date = %q{2011-04-11}
12
+ s.date = %q{2011-06-09}
13
13
  s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
14
14
  s.email = %q{diapriid@gmail.com}
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE",
17
- "README.rdoc"
17
+ "README.rdoc"
18
18
  ]
19
19
  s.files = [
20
20
  ".document",
21
- ".gitignore",
22
- "LICENSE",
23
- "README.rdoc",
24
- "Rakefile",
25
- "VERSION",
26
- "init.rb",
27
- "install.rb",
28
- "lib/lexer.rb",
29
- "lib/obo_parser.rb",
30
- "lib/parser.rb",
31
- "lib/tokens.rb",
32
- "lib/utilities.rb",
33
- "obo_parser.gemspec",
34
- "tasks/obo_parser_tasks.rake",
35
- "test/cell.obo",
36
- "test/obo_1.0_test.txt",
37
- "test/obo_1.0_test_wo_typedefs.txt",
38
- "test/test_obo_parser.rb",
39
- "uninstall.rb"
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "init.rb",
26
+ "install.rb",
27
+ "lib/lexer.rb",
28
+ "lib/obo_parser.rb",
29
+ "lib/parser.rb",
30
+ "lib/tokens.rb",
31
+ "lib/utilities.rb",
32
+ "obo_parser.gemspec",
33
+ "tasks/obo_parser_tasks.rake",
34
+ "test/cell.obo",
35
+ "test/go.obo",
36
+ "test/hao.obo",
37
+ "test/obo_1.0_test.txt",
38
+ "test/obo_1.0_test_wo_typedefs.txt",
39
+ "test/test_obo_parser.rb",
40
+ "test/tgma.obo",
41
+ "uninstall.rb"
40
42
  ]
41
43
  s.homepage = %q{http://github.com/mjy/obo_parser}
42
- s.rdoc_options = ["--charset=UTF-8"]
43
44
  s.require_paths = ["lib"]
44
- s.rubygems_version = %q{1.5.3}
45
+ s.rubygems_version = %q{1.7.2}
45
46
  s.summary = %q{A simple OBO file handler.}
46
- s.test_files = [
47
- "test/test_obo_parser.rb"
48
- ]
49
47
 
50
48
  if s.respond_to? :specification_version then
51
49
  s.specification_version = 3
data/test/cell.obo CHANGED
@@ -4365,7 +4365,7 @@ is_a: CL:0000255 ! eukaryotic cell
4365
4365
  [Term]
4366
4366
  id: CL:0000611
4367
4367
  name: eosinophil progenitor cell
4368
- comment: These cells are CD34-positive, CD45RA-negative, CD71-negative, and lineage-negative (CD2, CD3 epsilon, CD4, CD5, CD8a, CD14, CD19, CD20, integrin alpha-M, NCAM-1, SCA1, Ly6G, Ly76).
4368
+ comment: These cells are CD34-positive, CD45RA-negative, CD71-negative, and lineage-negative (CD2, CD3 epsilon, CD4, CD5, CD8a, CD14, CD19, CD20, integrin alpha-M, NCAM-1, SCA¿1, Ly6G, Ly76).
4369
4369
  synonym: "CFU-Eo" RELATED []
4370
4370
  synonym: "colony forming unit eosinophil" RELATED []
4371
4371
  synonym: "EoP" EXACT [PMID:15955840, PMID:19114669]