mesh-medical-subject-headings 1.3.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/test/test_helper.rb CHANGED
@@ -9,17 +9,17 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
9
9
 
10
10
  require "MESH"
11
11
 
12
- puts 'Configuring MESH::Mesh — this may take up to 10 seconds.'
13
- start = Time.now
14
- MESH::Mesh.configure(filename: File.expand_path('../../data/mesh_data_2014/d2014.bin.gz', __FILE__))
15
- finish = Time.now
16
- configuration_time = finish - start
12
+ # puts 'Configuring MESH::Mesh — this may take up to 10 seconds.'
13
+ # start = Time.now
14
+ # MESH::Mesh.configure(filename: File.expand_path('../../data/mesh_data_2014/d2014.bin.gz', __FILE__))
15
+ # finish = Time.now
16
+ # configuration_time = finish - start
17
17
  #raise 'MESH::Mesh should configure in less than 10 seconds.' unless configuration_time < 10
18
18
 
19
- puts 'Translating MESH::Mesh into English ;) — this may take up to 60 seconds.'
20
- start = Time.now
21
- MESH::Mesh.translate('en-GB', MESH::Translator.new(MESH::Translator.enus_to_engb))
22
- finish = Time.now
23
- configuration_time = finish - start
24
- puts "took #{configuration_time}"
19
+ # puts 'Translating MESH::Mesh into English ;) — this may take up to 60 seconds.'
20
+ # start = Time.now
21
+ # MESH::Mesh.translate('en-GB', MESH::Translator.new(MESH::Translator.enus_to_engb))
22
+ # finish = Time.now
23
+ # configuration_time = finish - start
24
+ # puts "took #{configuration_time}"
25
25
  #raise 'MESH::Mesh should translate in less than 30 seconds.' unless configuration_time < 60
data/tr_speed.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  require_relative 'lib/MESH'
2
2
 
3
3
  puts DateTime.now
4
- MESH::Mesh.configure(filename: 'data/mesh_data_2014/d2014.bin.gz')
4
+ MESH::Heading.configure(filename: 'data/mesh_data_2014/d2014.bin.gz')
5
5
  puts DateTime.now
6
- MESH::Mesh.translate('en-GB', MESH::Translator.new(MESH::Translator.enus_to_engb))
6
+ MESH::Heading.translate('en-GB', MESH::Translator.new(MESH::Translator.enus_to_engb))
7
7
  puts DateTime.now
8
8
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mesh-medical-subject-headings
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-10 00:00:00.000000000 Z
11
+ date: 2014-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -113,17 +113,19 @@ files:
113
113
  - Rakefile
114
114
  - data/mesh_data_2014/c2014.bin.gz
115
115
  - data/mesh_data_2014/d2014.bin.gz
116
+ - data/mesh_data_2014/d2014.en_gb.bin.gz
116
117
  - data/mesh_data_2014/mtrees2014.bin.gz
117
118
  - data/mesh_data_2014/q2014.bin.gz
118
119
  - data/mesh_data_2014/useful_2014.tsv
119
120
  - lib/MESH.rb
120
121
  - lib/MESH/classifier.rb
121
- - lib/MESH/mesh.rb
122
+ - lib/MESH/heading.rb
122
123
  - lib/MESH/translator.rb
124
+ - lib/MESH/tree.rb
123
125
  - lib/MESH/version.rb
124
126
  - match.rb
125
127
  - test/classifier_test.rb
126
- - test/mesh_test.rb
128
+ - test/mesh_core_test.rb
127
129
  - test/test_helper.rb
128
130
  - test/translator_test.rb
129
131
  - tr_speed.rb
@@ -154,7 +156,7 @@ summary: A ruby gem containing MeSH subject headings (https://www.nlm.nih.gov/me
154
156
  for use in classifying and entity recognition.
155
157
  test_files:
156
158
  - test/classifier_test.rb
157
- - test/mesh_test.rb
159
+ - test/mesh_core_test.rb
158
160
  - test/test_helper.rb
159
161
  - test/translator_test.rb
160
162
  has_rdoc:
data/lib/MESH/mesh.rb DELETED
@@ -1,314 +0,0 @@
1
- module MESH
2
- class Mesh
3
-
4
- include Comparable
5
- attr_accessor :unique_id, :tree_numbers, :roots, :parents, :children, :useful, :descriptor_class
6
-
7
- def <=> other
8
- self.unique_id <=> other.unique_id
9
- end
10
-
11
- def original_heading(locale = @@default_locale)
12
- return @original_heading[locale]
13
- end
14
-
15
- def natural_language_name(locale = @@default_locale)
16
- return @natural_language_name[locale]
17
- end
18
-
19
- def summary(locale = @@default_locale)
20
- return @summary[locale]
21
- end
22
-
23
- def entries(locale = @@default_locale)
24
- @entries[locale] ||= []
25
- return @entries[locale]
26
- end
27
-
28
- def self.configure(args)
29
- return if @@configured
30
- raise ArgumentError.new('MeshHeadingGraph requires a filename in order to configure itself') unless not args[:filename].nil?
31
-
32
- gzipped_file = File.open(args[:filename])
33
- file = Zlib::GzipReader.new(gzipped_file)
34
-
35
- current_heading = Mesh.new
36
- file.each_line do |line|
37
-
38
- case
39
-
40
- when matches = line.match(/^\*NEWRECORD$/)
41
- unless current_heading.unique_id.nil?
42
- current_heading.entries.sort!
43
- @@headings << current_heading
44
- @@by_unique_id[current_heading.unique_id] = current_heading
45
- @@by_original_heading[current_heading.original_heading] = current_heading
46
- current_heading.tree_numbers.each do |tree_number|
47
- @@by_tree_number[tree_number] = current_heading
48
- end
49
- end
50
- current_heading = Mesh.new
51
-
52
- when matches = line.match(/^UI = (.*)/)
53
- current_heading.unique_id = matches[1]
54
-
55
- when matches = line.match(/^MN = (.*)/)
56
- current_heading.tree_numbers << matches[1]
57
- current_heading.roots << matches[1][0] unless current_heading.roots.include?(matches[1][0])
58
-
59
- when matches = line.match(/^MS = (.*)/)
60
- current_heading.set_summary(matches[1])
61
-
62
- when matches = line.match(/^DC = (.*)/)
63
- current_heading.descriptor_class = @@descriptor_classes[matches[1].to_i]
64
-
65
- when matches = line.match(/^MH = (.*)/)
66
- mh = matches[1]
67
- current_heading.set_original_heading(mh)
68
- current_heading.entries << mh
69
- librarian_parts = mh.match(/(.*), (.*)/)
70
- nln = librarian_parts.nil? ? mh : "#{librarian_parts[2]} #{librarian_parts[1]}"
71
- current_heading.set_natural_language_name(nln)
72
-
73
- when matches = line.match(/^(?:PRINT )?ENTRY = ([^|]+)/)
74
- entry = matches[1].chomp
75
- current_heading.entries << entry
76
-
77
- end
78
-
79
- end
80
-
81
- @@by_unique_id.each do |id, heading|
82
- heading.tree_numbers.each do |tree_number|
83
- #D03.438.221.173
84
- parts = tree_number.split('.')
85
- if parts.size > 1
86
- parts.pop
87
- parent_tree_number = parts.join '.'
88
- parent = @@by_tree_number[parent_tree_number]
89
- heading.parents << parent unless parent.nil?
90
- parent.children << heading unless parent.nil?
91
- end
92
- end
93
- end
94
- @@configured = true
95
- end
96
-
97
- def self.translate(locale, tr)
98
- return if @@locales.include? locale
99
- @@headings.each_with_index do |h, i|
100
- h.set_original_heading(tr.translate(h.original_heading), locale)
101
- h.set_natural_language_name(tr.translate(h.natural_language_name), locale)
102
- h.set_summary(tr.translate(h.summary), locale)
103
- h.entries.each { |entry| h.entries(locale) << tr.translate(entry) }
104
- h.entries(locale).sort!
105
- end
106
-
107
- @@locales << locale
108
- end
109
-
110
- def self.find(unique_id)
111
- raise 'MeshHeadingGraph.configure must be called before use' unless @@configured
112
- return @@by_unique_id[unique_id]
113
- end
114
-
115
- def self.find_by_tree_number(tree_number)
116
- raise 'MeshHeadingGraph.configure must be called before use' unless @@configured
117
- return @@by_tree_number[tree_number]
118
- end
119
-
120
- def self.find_by_original_heading(heading)
121
- raise 'MeshHeadingGraph.configure must be called before use' unless @@configured
122
- return @@by_original_heading[heading]
123
- end
124
-
125
- def self.where(conditions)
126
- matches = []
127
- @@headings.each do |heading|
128
- matches << heading if heading.matches(conditions)
129
- end
130
- matches
131
- end
132
-
133
- def self.each
134
- for i in 0 ... @@headings.size
135
- yield @@headings[i] if @@headings[i].useful
136
- end
137
- end
138
-
139
- def self.match_in_text(text)
140
- return [] if text.nil?
141
- downcased = text.downcase
142
- matches = []
143
- @@headings.each do |heading|
144
- next unless heading.useful
145
- @@locales.each do |locale|
146
- heading.entries(locale).each do |entry|
147
- if downcased.include? entry.downcase #This is a looser check than the regex but much, much faster
148
- if /^[A-Z0-9]+$/ =~ entry
149
- regex = /(^|\W)#{Regexp.quote(entry)}(\W|$)/
150
- else
151
- regex = /(^|\W)#{Regexp.quote(entry)}(\W|$)/i
152
- end
153
- text.to_enum(:scan, regex).map do |m,|
154
- matches << {heading: heading, matched: entry, index: $`.size}
155
- end
156
- end
157
- end
158
- end
159
- end
160
- confirmed_matches = []
161
- matches.combination(2) do |l, r|
162
- if (r[:index] >= l[:index]) && (r[:index] + r[:matched].length <= l[:index] + l[:matched].length)
163
- #r is within l
164
- r[:delete] = true
165
- elsif (l[:index] >= r[:index]) && (l[:index] + l[:matched].length <= r[:index] + r[:matched].length)
166
- #l is within r
167
- l[:delete] = true
168
- end
169
- end
170
- matches.delete_if { |match| match[:delete] }
171
- end
172
-
173
- def has_ancestor(heading)
174
- return false if parents.empty?
175
- return true if parents.include? heading
176
- in_grandparents = parents.map { |p| p.has_ancestor(heading) }
177
- return in_grandparents.include? true
178
- end
179
-
180
- def has_descendant(heading)
181
- return false if children.empty?
182
- return true if children.include? heading
183
- in_grandchildren = children.map { |p| p.has_descendant(heading) }
184
- return in_grandchildren.include? true
185
- end
186
-
187
- def sibling?(heading)
188
- common_parents = parents & heading.parents
189
- !common_parents.empty?
190
- end
191
-
192
- def deepest_position(root = '')
193
- return nil if tree_numbers.empty?
194
- deepest_tree_number = tree_numbers.max_by { |tn| tn.start_with?(root) ? tn.length : 0 }
195
- deepest_tree_number.split('.').length
196
- end
197
-
198
- def shallowest_position
199
- return nil if tree_numbers.empty?
200
- shallowest_tree_number = tree_numbers.min_by { |tn| tn.length }
201
- shallowest_tree_number.split('.').length
202
- end
203
-
204
- def self.cluster(headings)
205
- return headings
206
- end
207
-
208
- def matches(conditions)
209
- conditions.each do |field, pattern|
210
- field_content = self.send(field)
211
- if field_content.kind_of?(Array)
212
- return false unless field_content.find { |fc| pattern =~ fc }
213
- elsif field_content.is_a?(TrueClass) || field_content.is_a?(FalseClass)
214
- return false unless field_content == pattern
215
- elsif field_content.is_a? Symbol
216
- return field_content == pattern
217
- else
218
- return false unless pattern =~ field_content
219
- end
220
- end
221
- return true
222
- end
223
-
224
- def inspect
225
- return "#{unique_id}, #{original_heading}, [#{tree_numbers.join(',')}]"
226
- end
227
-
228
- def set_original_heading(heading, locale = @@default_locale)
229
- @original_heading[locale] = heading
230
- end
231
-
232
- def set_natural_language_name(name, locale = @@default_locale)
233
- @natural_language_name[locale] = name
234
- end
235
-
236
- def set_summary(summary, locale = @@default_locale)
237
- @summary[locale] = summary
238
- end
239
-
240
- private
241
-
242
- @@configured = false
243
- @@headings = []
244
- @@by_unique_id = {}
245
- @@by_tree_number = {}
246
- @@by_original_heading = {}
247
- @@default_locale = 'en-US'
248
- @@locales = [@@default_locale]
249
- @@us_to_gb = Translator.new(Translator.enus_to_engb)
250
- @@descriptor_classes = [:make_array_start_at_1, :topical_descriptor, :publication_type, :check_tag, :geographic_descriptor]
251
-
252
- def initialize
253
- @useful = true
254
- @tree_numbers = []
255
- @roots = []
256
- @parents = []
257
- @children = []
258
- @entries = {}
259
- @entries[@@default_locale] = []
260
- @original_heading = {}
261
- @natural_language_name = {}
262
- @summary = {}
263
- end
264
-
265
-
266
- end
267
- end
268
-
269
- #
270
- #*NEWRECORD
271
- #RECTYPE = D
272
- #MH = Calcimycin
273
- #AQ = AA AD AE AG AI AN BI BL CF CH CL CS CT DU EC HI IM IP ME PD PK PO RE SD ST TO TU UR
274
- #ENTRY = A-23187|T109|T195|LAB|NRW|NLM (1991)|900308|abbcdef
275
- #ENTRY = A23187|T109|T195|LAB|NRW|UNK (19XX)|741111|abbcdef
276
- #ENTRY = Antibiotic A23187|T109|T195|NON|NRW|NLM (1991)|900308|abbcdef
277
- #ENTRY = A 23187
278
- #ENTRY = A23187, Antibiotic
279
- #MN = D03.438.221.173
280
- #PA = Anti-Bacterial Agents
281
- #PA = Calcium Ionophores
282
- #MH_TH = FDA SRS (2014)
283
- #MH_TH = NLM (1975)
284
- #ST = T109
285
- #ST = T195
286
- #N1 = 4-Benzoxazolecarboxylic acid, 5-(methylamino)-2-((3,9,11-trimethyl-8-(1-methyl-2-oxo-2-(1H-pyrrol-2-yl)ethyl)-1,7-dioxaspiro(5.5)undec-2-yl)methyl)-, (6S-(6alpha(2S*,3S*),8beta(R*),9beta,11alpha))-
287
- # RN = 37H9VM9WZL
288
- #RR = 52665-69-7 (Calcimycin)
289
- #PI = Antibiotics (1973-1974)
290
- #PI = Carboxylic Acids (1973-1974)
291
- #MS = An ionophorous, polyether antibiotic from Streptomyces chartreusensis. It binds and transports CALCIUM and other divalent cations across membranes and uncouples oxidative phosphorylation while inhibiting ATPase of rat liver mitochondria. The substance is used mostly as a biochemical tool to study the role of divalent cations in various biological systems.
292
- # OL = use CALCIMYCIN to search A 23187 1975-90
293
- #PM = 91; was A 23187 1975-90 (see under ANTIBIOTICS 1975-83)
294
- #HN = 91(75); was A 23187 1975-90 (see under ANTIBIOTICS 1975-83)
295
- #MED = *62
296
- #MED = 847
297
- #M90 = *299
298
- #M90 = 2405
299
- #M85 = *454
300
- #M85 = 2878
301
- #M80 = *316
302
- #M80 = 1601
303
- #M75 = *300
304
- #M75 = 823
305
- #M66 = *1
306
- #M66 = 3
307
- #M94 = *153
308
- #M94 = 1606
309
- #MR = 20130708
310
- #DA = 19741119
311
- #DC = 1
312
- #DX = 19840101
313
- #UI = D000001
314
- #