mesh-medical-subject-headings 2.0.6 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 65d7558eb1ce2fd0321a053021badccd5a8c3843
4
- data.tar.gz: 3c8cb444d84df8d11c7e7a625436f56b743b7a24
3
+ metadata.gz: a2aca8f7e1eceac511966cc2a886258c2bc81c19
4
+ data.tar.gz: 1c3f83c62564d794239a05084b099dd0160da13b
5
5
  SHA512:
6
- metadata.gz: 903e6e353b7fa48427e8b5e9f63e43bb8e8882fac4d0830088cea7decf0ca0fe967acb36f8ed55f0087268a480ac85e48ec5369434975a36ae29cdedfadfee7b
7
- data.tar.gz: d79445bd35b3530d77ee4a578848dd18da766f2bd23f6797709b2b7fa5776afbdb6f65929c4b6266afaa90d368b99ecc7cbfbd206d6c9fbc4cabfd52127d07f8
6
+ metadata.gz: b06e343f36965d05b05a4689abfe316430f0b4a069432e51609dbbf15b0426edcbad038f129410b6b20d561617472cec601aa1e14c7589683149ec9a6b08183d
7
+ data.tar.gz: 7b8dd6e676a376ce773100cec77e7e1ab923492016d375be9af071ac08895bfa5a9e9c4f94e31376ef4a5f2205308638fec9228c940758c85829f58027238b0d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ #2.1.0 / 2014-07-04
2
+ * [FEATURE] Headings matched with Wikipedia, infobox images and abstracts imported
3
+
1
4
  #2.0.6 / 2014-06-26
2
5
  * [BUGFIX] Match on summary links with hyphens in
3
6
 
data/Gemfile.lock CHANGED
@@ -1,15 +1,18 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- mesh-medical-subject-headings (2.0.6)
4
+ mesh-medical-subject-headings (2.1.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
9
  metaclass (0.0.4)
10
+ mini_portile (0.6.0)
10
11
  minitest (5.0.8)
11
12
  mocha (1.0.0)
12
13
  metaclass (~> 0.0.1)
14
+ nokogiri (1.6.2.1)
15
+ mini_portile (= 0.6.0)
13
16
  rake (10.2.2)
14
17
  ruby-prof (0.14.2)
15
18
  yard (0.8.7.4)
@@ -22,6 +25,7 @@ DEPENDENCIES
22
25
  mesh-medical-subject-headings!
23
26
  minitest (~> 5.0.8)
24
27
  mocha
28
+ nokogiri
25
29
  rake
26
30
  ruby-prof
27
31
  yard
data/MESH.gemspec CHANGED
@@ -24,4 +24,5 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency 'yard'
25
25
  spec.add_development_dependency 'minitest', '~> 5.0.8'
26
26
  spec.add_development_dependency 'ruby-prof'
27
+ spec.add_development_dependency 'nokogiri'
27
28
  end
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/MESH'
4
+ require 'json'
5
+ require 'nokogiri'
6
+
7
+ class Wikidoc < Nokogiri::XML::SAX::Document
8
+
9
+ def initialize(links_by_uri)
10
+ super()
11
+ @links_by_uri = links_by_uri
12
+ end
13
+
14
+ def start_element name, attrs = []
15
+ @elements ||= []
16
+ @elements.push(name)
17
+ if name == 'doc'
18
+ @current_title = ''
19
+ @current_abstract = ''
20
+ @current_url = ''
21
+ end
22
+ end
23
+
24
+ def characters string
25
+ if @elements.last == 'title'
26
+ @current_title = string
27
+ elsif @elements.last == 'abstract'
28
+ @current_abstract = string
29
+ elsif @elements.last == 'url'
30
+ @current_url = string
31
+ end
32
+ end
33
+
34
+ def end_element name
35
+ if name == 'doc' && @links_by_uri[@current_url]
36
+ STDERR.print '.'
37
+ @links_by_uri[@current_url].each do |l|
38
+ title = @current_title.gsub(/^Wikipedia: /, '')
39
+ l[:title] = title
40
+ l[:abstract] = @current_abstract
41
+ # puts l
42
+ end
43
+ elsif name == 'doc'
44
+ STDERR.print '-'
45
+ end
46
+ @elements.pop
47
+ end
48
+
49
+ end
50
+
51
+ filename = File.expand_path("../../data/mesh_data_2014/d2014.wikipedia.bin.gz", __FILE__)
52
+ gzipped_file = File.open(filename)
53
+ file = Zlib::GzipReader.new(gzipped_file)
54
+
55
+ unique_id = nil
56
+ mh = nil
57
+ wikipedia_links = []
58
+ by_uri = {}
59
+ headings = []
60
+
61
+ file.each_line do |line|
62
+
63
+ case
64
+
65
+ when line.match(/^\*NEWRECORD$/)
66
+ unless unique_id.nil?
67
+ hash = {
68
+ ui: unique_id,
69
+ mh: mh,
70
+ wikipedia_links: wikipedia_links
71
+ }
72
+
73
+ headings << hash
74
+
75
+ wikipedia_links.each do |wl|
76
+ by_uri[wl[:link]] ||= []
77
+ by_uri[wl[:link]] << wl
78
+ end
79
+
80
+ wikipedia_links = []
81
+ unique_id = nil
82
+ mh = nil
83
+ end
84
+
85
+ when matches = line.match(/^UI = (.*)/)
86
+ unique_id = matches[1]
87
+
88
+ when matches = line.match(/^MH = (.*)/)
89
+ mh = matches[1]
90
+
91
+ when matches = line.match(/^WK = (.*)/)
92
+ score, link, image = matches[1].split ';'
93
+ hash = {score: score, link: link.strip}
94
+ hash[:image] = image.strip unless image.nil?
95
+ wikipedia_links << hash
96
+ end
97
+
98
+ end
99
+
100
+ parser = Nokogiri::XML::SAX::Parser.new(Wikidoc.new(by_uri))
101
+ parser.parse(File.open(ARGV[0]))
102
+
103
+ headings.each do |h|
104
+ puts '*NEWRECORD'
105
+ puts "UI = #{h[:ui]}"
106
+ puts "MH = #{h[:mh]}"
107
+ h[:wikipedia_links].each do |wl|
108
+ puts "WK = #{wl.to_json}"
109
+ end
110
+ puts ''
111
+ end
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/MESH'
4
+ require 'net/http'
5
+ require 'nokogiri'
6
+ require 'uri'
7
+
8
+ mesh_tree = MESH::Tree.new
9
+
10
+ def sluggify(name)
11
+ CGI.escape(name.downcase.gsub(/\s/, '_').capitalize)
12
+ end
13
+
14
+ count = 0
15
+ mesh_tree.each do |h|
16
+ count += 1
17
+
18
+ candidates = {}
19
+ images = {}
20
+ h.entries.each do |e|
21
+
22
+ slug = sluggify(e)
23
+ uri = URI.parse("http://en.wikipedia.org/wiki/#{slug}")
24
+ response = Net::HTTP.get_response(uri)
25
+ if response.code == '200'
26
+ doc = Nokogiri::HTML(response.body)
27
+ # heading = doc.css('#firstHeading > span')
28
+ canonical = doc.xpath('/html/head/link[@rel="canonical"]/@href')
29
+ candidates[canonical.text] ||= 0
30
+ candidates[canonical.text] += 1
31
+ img_node = doc.xpath('(//table[@class="infobox"]//img)[1]/@src')
32
+ images[canonical.text] ||= img_node.text unless img_node.nil?
33
+ end
34
+
35
+ sleep 0.1
36
+ end
37
+
38
+ best_candidates = candidates.reduce({}) { |h, (k, v)| (h[v] ||= []) << k; h }.max
39
+ if best_candidates && !best_candidates.empty?
40
+ score, candidates = best_candidates
41
+ puts "UI = #{h.unique_id}"
42
+ puts "MH = #{h.original_heading}"
43
+ puts "ENTRIES = #{h.entries.join(' -- ')}"
44
+ candidates.each do |c|
45
+ puts "WK = #{score}; #{c}"
46
+ puts "WI = #{images[c]}" unless images[c].nil? || images[c].empty?
47
+ end
48
+ puts ''
49
+ STDOUT.flush
50
+ end
51
+
52
+
53
+ end
data/bin/translate ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/MESH'
4
+
5
+ mesh_tree = MESH::Tree.new
6
+ tr = MESH::Translator.new(MESH::Translator.enus_to_engb)
7
+
8
+ mesh_tree.each do |h|
9
+ puts "*NEWRECORD"
10
+ puts "MH = #{tr.translate(h.original_heading)}"
11
+ puts "MS = #{tr.translate(h.summary)}"
12
+ h.entries.each { |e| puts "ENTRY = #{tr.translate(e)}" }
13
+ puts "UI = #{h.unique_id}"
14
+ puts ''
15
+ end
data/lib/MESH.rb CHANGED
@@ -1,6 +1,8 @@
1
+ require 'json'
1
2
  require 'zlib'
2
3
  require 'MESH/version'
3
4
  require 'MESH/tree'
4
5
  require 'MESH/heading'
5
6
  require 'MESH/translator'
6
7
  require 'MESH/classifier'
8
+ require 'MESH/semantic_types'
data/lib/MESH/heading.rb CHANGED
@@ -2,7 +2,7 @@ module MESH
2
2
  class Heading
3
3
 
4
4
  include Comparable
5
- attr_accessor :unique_id, :tree_numbers, :roots, :parents, :children, :useful, :descriptor_class, :default_locale
5
+ attr_accessor :unique_id, :tree_numbers, :roots, :parents, :children, :useful, :descriptor_class, :default_locale, :semantic_types, :wikipedia_links
6
6
  attr_reader :linkified_summary
7
7
 
8
8
  def <=> other
@@ -107,6 +107,7 @@ module MESH
107
107
  @tree = tree
108
108
  @useful = true
109
109
  @tree_numbers = []
110
+ @semantic_types = []
110
111
  @roots = []
111
112
  @parents = []
112
113
  @children = []
@@ -114,6 +115,7 @@ module MESH
114
115
  @original_heading = {}
115
116
  @natural_language_name = {}
116
117
  @summary = {}
118
+ @wikipedia_links = []
117
119
  end
118
120
 
119
121
 
@@ -0,0 +1,146 @@
1
+ module MESH
2
+ class SemanticTypes
3
+
4
+ def self.[](key)
5
+ Types[key]
6
+ end
7
+
8
+ Types = {
9
+ 'T001' => 'Organism',
10
+ 'T002' => 'Plant',
11
+ 'T003' => 'Alga',
12
+ 'T004' => 'Fungus',
13
+ 'T005' => 'Virus',
14
+ 'T006' => 'Rickettsia or Chlamydia',
15
+ 'T007' => 'Bacterium',
16
+ 'T008' => 'Animal',
17
+ 'T009' => 'Invertebrate',
18
+ 'T010' => 'Vertebrate',
19
+ 'T011' => 'Amphibian',
20
+ 'T012' => 'Bird',
21
+ 'T013' => 'Fish',
22
+ 'T014' => 'Reptile',
23
+ 'T015' => 'Mammal',
24
+ 'T016' => 'Human',
25
+ 'T017' => 'Anatomical Structure',
26
+ 'T018' => 'Embryonic Structure',
27
+ 'T019' => 'Congenital Abnormality',
28
+ 'T020' => 'Acquired Abnormality',
29
+ 'T021' => 'Fully Formed Anatomical Structure',
30
+ 'T022' => 'Body System',
31
+ 'T023' => 'Body Part, Organ, or Organ Component',
32
+ 'T024' => 'Tissue',
33
+ 'T025' => 'Cell',
34
+ 'T026' => 'Cell Component',
35
+ 'T028' => 'Gene or Genome',
36
+ 'T029' => 'Body Location or Region',
37
+ 'T030' => 'Body Space or Junction',
38
+ 'T031' => 'Body Substance',
39
+ 'T032' => 'Organism Attribute',
40
+ 'T033' => 'Finding',
41
+ 'T034' => 'Laboratory or Test Result',
42
+ 'T037' => 'Injury or Poisoning',
43
+ 'T038' => 'Biologic Function',
44
+ 'T039' => 'Physiologic Function',
45
+ 'T040' => 'Organism Function',
46
+ 'T041' => 'Mental Process',
47
+ 'T042' => 'Organ or Tissue Function',
48
+ 'T043' => 'Cell Function',
49
+ 'T044' => 'Molecular Function',
50
+ 'T045' => 'Genetic Function',
51
+ 'T046' => 'Pathologic Function',
52
+ 'T047' => 'Disease or Syndrome',
53
+ 'T048' => 'Mental or Behavioral Dysfunction',
54
+ 'T049' => 'Cell or Molecular Dysfunction',
55
+ 'T050' => 'Experimental Model of Disease',
56
+ 'T051' => 'Event',
57
+ 'T052' => 'Activity',
58
+ 'T053' => 'Behavior',
59
+ 'T054' => 'Social Behavior',
60
+ 'T055' => 'Individual Behavior',
61
+ 'T056' => 'Daily or Recreational Activity',
62
+ 'T057' => 'Occupational Activity',
63
+ 'T058' => 'Health Care Activity',
64
+ 'T059' => 'Laboratory Procedure',
65
+ 'T060' => 'Diagnostic Procedure',
66
+ 'T061' => 'Therapeutic or Preventive Procedure',
67
+ 'T062' => 'Research Activity',
68
+ 'T063' => 'Molecular Biology Research Technique',
69
+ 'T064' => 'Governmental or Regulatory Activity',
70
+ 'T065' => 'Educational Activity',
71
+ 'T066' => 'Machine Activity',
72
+ 'T067' => 'Phenomenon or Process',
73
+ 'T068' => 'Human-caused Phenomenon or Process',
74
+ 'T069' => 'Environmental Effect of Humans',
75
+ 'T070' => 'Natural Phenomenon or Process',
76
+ 'T071' => 'Entity',
77
+ 'T072' => 'Physical Object',
78
+ 'T073' => 'Manufactured Object',
79
+ 'T074' => 'Medical Device',
80
+ 'T075' => 'Research Device',
81
+ 'T077' => 'Conceptual Entity',
82
+ 'T078' => 'Idea or Concept',
83
+ 'T079' => 'Temporal Concept',
84
+ 'T080' => 'Qualitative Concept',
85
+ 'T081' => 'Quantitative Concept',
86
+ 'T082' => 'Spatial Concept',
87
+ 'T083' => 'Geographic Area',
88
+ 'T085' => 'Molecular Sequence',
89
+ 'T086' => 'Nucleotide Sequence',
90
+ 'T087' => 'Amino Acid Sequence',
91
+ 'T088' => 'Carbohydrate Sequence',
92
+ 'T089' => 'Regulation or Law',
93
+ 'T090' => 'Occupation or Discipline',
94
+ 'T091' => 'Biomedical Occupation or Discipline',
95
+ 'T092' => 'Organization',
96
+ 'T093' => 'Health Care Related Organization',
97
+ 'T094' => 'Professional Society',
98
+ 'T095' => 'Self-help or Relief Organization',
99
+ 'T096' => 'Group',
100
+ 'T097' => 'Professional or Occupational Group',
101
+ 'T098' => 'Population Group',
102
+ 'T099' => 'Family Group',
103
+ 'T100' => 'Age Group',
104
+ 'T101' => 'Patient or Disabled Group',
105
+ 'T102' => 'Group Attribute',
106
+ 'T103' => 'Chemical',
107
+ 'T104' => 'Chemical Viewed Structurally',
108
+ 'T109' => 'Organic Chemical',
109
+ 'T110' => 'Steroid',
110
+ 'T111' => 'Eicosanoid',
111
+ 'T114' => 'Nucleic Acid, Nucleoside, or Nucleotide',
112
+ 'T115' => 'Organophosphorus Compound',
113
+ 'T116' => 'Amino Acid, Peptide, or Protein',
114
+ 'T118' => 'Carbohydrate',
115
+ 'T119' => 'Lipid',
116
+ 'T120' => 'Chemical Viewed Functionally',
117
+ 'T121' => 'Pharmacologic Substance',
118
+ 'T122' => 'Biomedical or Dental Material',
119
+ 'T123' => 'Biologically Active Substance',
120
+ 'T124' => 'Neuroreactive Substance or Biogenic Amine',
121
+ 'T125' => 'Hormone',
122
+ 'T126' => 'Enzyme',
123
+ 'T127' => 'Vitamin',
124
+ 'T129' => 'Immunologic Factor',
125
+ 'T130' => 'Indicator, Reagent, or Diagnostic Aid',
126
+ 'T131' => 'Hazardous or Poisonous Substance',
127
+ 'T167' => 'Substance',
128
+ 'T168' => 'Food',
129
+ 'T169' => 'Functional Concept',
130
+ 'T170' => 'Intellectual Product',
131
+ 'T171' => 'Language',
132
+ 'T184' => 'Sign or Symptom',
133
+ 'T185' => 'Classification',
134
+ 'T190' => 'Anatomical Abnormality',
135
+ 'T191' => 'Neoplastic Process',
136
+ 'T192' => 'Receptor',
137
+ 'T194' => 'Archaeon',
138
+ 'T195' => 'Antibiotic',
139
+ 'T196' => 'Element, Ion, or Isotope',
140
+ 'T197' => 'Inorganic Chemical',
141
+ 'T200' => 'Clinical Drug',
142
+ 'T201' => 'Clinical Attribute'
143
+ }
144
+
145
+ end
146
+ end
data/lib/MESH/tree.rb CHANGED
@@ -24,7 +24,7 @@ module MESH
24
24
 
25
25
  case
26
26
 
27
- when matches = line.match(/^\*NEWRECORD$/)
27
+ when line.match(/^\*NEWRECORD$/)
28
28
  unless current_heading.unique_id.nil?
29
29
  current_heading.entries.sort!
30
30
  @headings << current_heading
@@ -56,6 +56,9 @@ module MESH
56
56
  when matches = line.match(/^DC = (.*)/)
57
57
  current_heading.descriptor_class = @@descriptor_classes[matches[1].to_i]
58
58
 
59
+ when matches = line.match(/^ST = (.*)/)
60
+ current_heading.semantic_types << MESH::SemanticTypes[matches[1]]
61
+
59
62
  when matches = line.match(/^MH = (.*)/)
60
63
  mh = matches[1]
61
64
  current_heading.set_original_heading(mh)
@@ -107,7 +110,7 @@ module MESH
107
110
 
108
111
  case
109
112
 
110
- when matches = line.match(/^\*NEWRECORD$/)
113
+ when line.match(/^\*NEWRECORD$/)
111
114
  unless unique_id.nil?
112
115
  entries.sort!
113
116
  entries.uniq!
@@ -147,6 +150,45 @@ module MESH
147
150
  @locales << locale
148
151
  end
149
152
 
153
+ def load_wikipedia
154
+ return if @wikipedia_loaded
155
+ filename = File.expand_path("../../../data/mesh_data_2014/d2014.wikipedia.bin.gz", __FILE__)
156
+ gzipped_file = File.open(filename)
157
+ file = Zlib::GzipReader.new(gzipped_file)
158
+
159
+ unique_id = nil
160
+ wikipedia_links = []
161
+ file.each_line do |line|
162
+
163
+ case
164
+
165
+ when line.match(/^\*NEWRECORD$/)
166
+ unless unique_id.nil?
167
+ if heading = find(unique_id)
168
+ wikipedia_links.each do |wl|
169
+ wl[:score] = (wl[:score].to_f / heading.entries.length.to_f).round(2)
170
+ end
171
+ heading.wikipedia_links = wikipedia_links
172
+ end
173
+
174
+ wikipedia_links = []
175
+ unique_id = nil
176
+ end
177
+
178
+ when matches = line.match(/^UI = (.*)/)
179
+ unique_id = matches[1]
180
+
181
+ when matches = line.match(/^WK = (.*)/)
182
+ hash = JSON.parse(matches[1], symbolize_names: true)
183
+ wikipedia_links << hash
184
+
185
+ end
186
+
187
+ end
188
+ @wikipedia_loaded = true
189
+ end
190
+
191
+
150
192
  def linkify_summaries &block
151
193
  @headings.each do |h|
152
194
  h.linkify_summary &block
data/lib/MESH/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Mesh
2
- VERSION = "2.0.6"
2
+ VERSION = "2.1.0"
3
3
  end
@@ -105,6 +105,16 @@ module MESH
105
105
  assert_equal :check_tag, mh.descriptor_class
106
106
  end
107
107
 
108
+ def test_have_the_correct_semantic_type
109
+ mh = @mesh_tree.find('D000224')
110
+ assert_equal ['Disease or Syndrome'], mh.semantic_types
111
+ mh = @mesh_tree.find('D005260')
112
+ assert_equal ['Organism Attribute'], mh.semantic_types
113
+ mh = @mesh_tree.find('D014148')
114
+ assert_equal ['Organic Chemical', 'Pharmacologic Substance'], mh.semantic_types
115
+
116
+ end
117
+
108
118
  def test_have_the_correct_original_heading
109
119
  mh = @mesh_tree.find('D000224')
110
120
  assert_equal 'Addison Disease', mh.original_heading
@@ -219,6 +229,95 @@ module MESH
219
229
  assert_equal expected_entries_en.sort, mh.entries(:en_gb)
220
230
  end
221
231
 
232
+ def test_have_a_single_wikipedia_link
233
+
234
+ expected = {
235
+ 'D000001' => 'http://en.wikipedia.org/wiki/A23187',
236
+ 'D000005' => 'http://en.wikipedia.org/wiki/Abdomen',
237
+ 'D000082' => 'http://en.wikipedia.org/wiki/Paracetamol'
238
+ }
239
+
240
+ expected.each do |id, expected_link|
241
+ mh = @mesh_tree.find(id)
242
+ assert_equal 1, mh.wikipedia_links.length
243
+ assert_equal expected_link, mh.wikipedia_links[0][:link]
244
+ end
245
+
246
+ end
247
+
248
+ def test_have_a_single_wikipedia_score
249
+ expected = {
250
+ 'D000001' => 0.5,
251
+ 'D000005' => 1.0,
252
+ 'D000082' => 0.35
253
+ }
254
+
255
+ expected.each do |id, expected_score|
256
+ mh = @mesh_tree.find(id)
257
+ assert_equal 1, mh.wikipedia_links.length
258
+ assert_equal expected_score, mh.wikipedia_links[0][:score]
259
+ end
260
+
261
+ end
262
+
263
+ def test_have_a_single_wikipedia_image
264
+ expected = {
265
+ 'D000001' => 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/17/A23187.png/220px-A23187.png',
266
+ 'D000005' => 'http://upload.wikimedia.org/wikipedia/commons/thumb/3/3b/Abdomen_%28PSF%29.jpg/250px-Abdomen_%28PSF%29.jpg',
267
+ 'D000082' => 'http://upload.wikimedia.org/wikipedia/commons/thumb/2/29/Paracetamol-skeletal.svg/150px-Paracetamol-skeletal.svg.png'
268
+ }
269
+
270
+ expected.each do |id, expected_image|
271
+ mh = @mesh_tree.find(id)
272
+ assert_equal 1, mh.wikipedia_links.length
273
+ assert_equal expected_image, mh.wikipedia_links[0][:image]
274
+ end
275
+ end
276
+
277
+ def test_have_a_single_wikipedia_abstract
278
+ expected = {
279
+ 'D000001' => '| CAS_number = 52665-69-7',
280
+ 'D000005' => 'The abdomen (less formally called the belly, stomach, or tummy), in vertebrates such as mammals, constitutes the part of the body between the thorax (chest) and pelvis. The region enclosed by the abdomen is termed the abdominal cavity.',
281
+ 'D000082' => '| MedlinePlus = a681004'
282
+ }
283
+
284
+ expected.each do |id, expected_abstract|
285
+ mh = @mesh_tree.find(id)
286
+ assert_equal 1, mh.wikipedia_links.length
287
+ assert_equal expected_abstract, mh.wikipedia_links[0][:abstract]
288
+ end
289
+ end
290
+
291
+ def test_have_more_than_one_wikipedia_link
292
+ mh = @mesh_tree.find('D000100')
293
+ expected = %w(
294
+ http://en.wikipedia.org/wiki/Sodium_acetrizoate
295
+ http://en.wikipedia.org/wiki/Acetrizoic_acid
296
+ )
297
+ assert_equal expected, mh.wikipedia_links.map { |l| l[:link] }
298
+ end
299
+
300
+ def test_have_more_than_one_wikipedia_score
301
+ mh = @mesh_tree.find('D000100')
302
+ expected = [0.09, 0.09]
303
+ assert_equal expected, mh.wikipedia_links.map { |l| l[:score] }
304
+ end
305
+
306
+ def test_have_more_than_one_wikipedia_image
307
+ mh = @mesh_tree.find('D000100')
308
+ expected = %w(
309
+ http://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Sodium_acetrizoate.svg/150px-Sodium_acetrizoate.svg.png
310
+ http://upload.wikimedia.org/wikipedia/commons/thumb/2/26/Acetrizoic_acid.png/220px-Acetrizoic_acid.png
311
+ )
312
+ assert_equal expected, mh.wikipedia_links.map { |l| l[:image] }
313
+ end
314
+
315
+ def test_have_more_than_one_wikipedia_abstract
316
+ mh = @mesh_tree.find('D000100')
317
+ expected = ['| CAS_number = 129-63-5', '| CAS_number = 85-36-9']
318
+ assert_equal expected, mh.wikipedia_links.map { |l| l[:abstract] }
319
+ end
320
+
222
321
  def test_have_the_correct_parent
223
322
  mh = @mesh_tree.find('D000001')
224
323
  assert_equal 1, mh.parents.length
@@ -578,6 +677,7 @@ module MESH
578
677
  def setup
579
678
  @@mesh_tree ||= MESH::Tree.new
580
679
  @@mesh_tree.load_translation(:en_gb)
680
+ @@mesh_tree.load_wikipedia
581
681
  @mesh_tree = @@mesh_tree
582
682
  @example_text ||= 'Leukaemia in Downs Syndrome
583
683
  Overview
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mesh-medical-subject-headings
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.6
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-26 00:00:00.000000000 Z
11
+ date: 2014-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - '>='
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: nokogiri
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  description: A ruby gem containing MeSH subject headings (https://www.nlm.nih.gov/mesh/)
98
112
  for use in classifying and entity recognition.
99
113
  email:
@@ -111,14 +125,19 @@ files:
111
125
  - MESH.gemspec
112
126
  - README.md
113
127
  - Rakefile
128
+ - bin/extract_wikipedia_abstracts
129
+ - bin/match_wikipedia
130
+ - bin/translate
114
131
  - data/mesh_data_2014/c2014.bin.gz
115
132
  - data/mesh_data_2014/d2014.bin.gz
116
133
  - data/mesh_data_2014/d2014.en_gb.bin.gz
134
+ - data/mesh_data_2014/d2014.wikipedia.bin.gz
117
135
  - data/mesh_data_2014/mtrees2014.bin.gz
118
136
  - data/mesh_data_2014/q2014.bin.gz
119
137
  - lib/MESH.rb
120
138
  - lib/MESH/classifier.rb
121
139
  - lib/MESH/heading.rb
140
+ - lib/MESH/semantic_types.rb
122
141
  - lib/MESH/translator.rb
123
142
  - lib/MESH/tree.rb
124
143
  - lib/MESH/version.rb