mesh-medical-subject-headings 2.0.6 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/Gemfile.lock +5 -1
- data/MESH.gemspec +1 -0
- data/bin/extract_wikipedia_abstracts +111 -0
- data/bin/match_wikipedia +53 -0
- data/bin/translate +15 -0
- data/data/mesh_data_2014/d2014.wikipedia.bin.gz +0 -0
- data/lib/MESH.rb +2 -0
- data/lib/MESH/heading.rb +3 -1
- data/lib/MESH/semantic_types.rb +146 -0
- data/lib/MESH/tree.rb +44 -2
- data/lib/MESH/version.rb +1 -1
- data/test/mesh_core_test.rb +100 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a2aca8f7e1eceac511966cc2a886258c2bc81c19
|
4
|
+
data.tar.gz: 1c3f83c62564d794239a05084b099dd0160da13b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b06e343f36965d05b05a4689abfe316430f0b4a069432e51609dbbf15b0426edcbad038f129410b6b20d561617472cec601aa1e14c7589683149ec9a6b08183d
|
7
|
+
data.tar.gz: 7b8dd6e676a376ce773100cec77e7e1ab923492016d375be9af071ac08895bfa5a9e9c4f94e31376ef4a5f2205308638fec9228c940758c85829f58027238b0d
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,15 +1,18 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
mesh-medical-subject-headings (2.0
|
4
|
+
mesh-medical-subject-headings (2.1.0)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
metaclass (0.0.4)
|
10
|
+
mini_portile (0.6.0)
|
10
11
|
minitest (5.0.8)
|
11
12
|
mocha (1.0.0)
|
12
13
|
metaclass (~> 0.0.1)
|
14
|
+
nokogiri (1.6.2.1)
|
15
|
+
mini_portile (= 0.6.0)
|
13
16
|
rake (10.2.2)
|
14
17
|
ruby-prof (0.14.2)
|
15
18
|
yard (0.8.7.4)
|
@@ -22,6 +25,7 @@ DEPENDENCIES
|
|
22
25
|
mesh-medical-subject-headings!
|
23
26
|
minitest (~> 5.0.8)
|
24
27
|
mocha
|
28
|
+
nokogiri
|
25
29
|
rake
|
26
30
|
ruby-prof
|
27
31
|
yard
|
data/MESH.gemspec
CHANGED
@@ -0,0 +1,111 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/MESH'
|
4
|
+
require 'json'
|
5
|
+
require 'nokogiri'
|
6
|
+
|
7
|
+
class Wikidoc < Nokogiri::XML::SAX::Document
|
8
|
+
|
9
|
+
def initialize(links_by_uri)
|
10
|
+
super()
|
11
|
+
@links_by_uri = links_by_uri
|
12
|
+
end
|
13
|
+
|
14
|
+
def start_element name, attrs = []
|
15
|
+
@elements ||= []
|
16
|
+
@elements.push(name)
|
17
|
+
if name == 'doc'
|
18
|
+
@current_title = ''
|
19
|
+
@current_abstract = ''
|
20
|
+
@current_url = ''
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def characters string
|
25
|
+
if @elements.last == 'title'
|
26
|
+
@current_title = string
|
27
|
+
elsif @elements.last == 'abstract'
|
28
|
+
@current_abstract = string
|
29
|
+
elsif @elements.last == 'url'
|
30
|
+
@current_url = string
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def end_element name
|
35
|
+
if name == 'doc' && @links_by_uri[@current_url]
|
36
|
+
STDERR.print '.'
|
37
|
+
@links_by_uri[@current_url].each do |l|
|
38
|
+
title = @current_title.gsub(/^Wikipedia: /, '')
|
39
|
+
l[:title] = title
|
40
|
+
l[:abstract] = @current_abstract
|
41
|
+
# puts l
|
42
|
+
end
|
43
|
+
elsif name == 'doc'
|
44
|
+
STDERR.print '-'
|
45
|
+
end
|
46
|
+
@elements.pop
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
filename = File.expand_path("../../data/mesh_data_2014/d2014.wikipedia.bin.gz", __FILE__)
|
52
|
+
gzipped_file = File.open(filename)
|
53
|
+
file = Zlib::GzipReader.new(gzipped_file)
|
54
|
+
|
55
|
+
unique_id = nil
|
56
|
+
mh = nil
|
57
|
+
wikipedia_links = []
|
58
|
+
by_uri = {}
|
59
|
+
headings = []
|
60
|
+
|
61
|
+
file.each_line do |line|
|
62
|
+
|
63
|
+
case
|
64
|
+
|
65
|
+
when line.match(/^\*NEWRECORD$/)
|
66
|
+
unless unique_id.nil?
|
67
|
+
hash = {
|
68
|
+
ui: unique_id,
|
69
|
+
mh: mh,
|
70
|
+
wikipedia_links: wikipedia_links
|
71
|
+
}
|
72
|
+
|
73
|
+
headings << hash
|
74
|
+
|
75
|
+
wikipedia_links.each do |wl|
|
76
|
+
by_uri[wl[:link]] ||= []
|
77
|
+
by_uri[wl[:link]] << wl
|
78
|
+
end
|
79
|
+
|
80
|
+
wikipedia_links = []
|
81
|
+
unique_id = nil
|
82
|
+
mh = nil
|
83
|
+
end
|
84
|
+
|
85
|
+
when matches = line.match(/^UI = (.*)/)
|
86
|
+
unique_id = matches[1]
|
87
|
+
|
88
|
+
when matches = line.match(/^MH = (.*)/)
|
89
|
+
mh = matches[1]
|
90
|
+
|
91
|
+
when matches = line.match(/^WK = (.*)/)
|
92
|
+
score, link, image = matches[1].split ';'
|
93
|
+
hash = {score: score, link: link.strip}
|
94
|
+
hash[:image] = image.strip unless image.nil?
|
95
|
+
wikipedia_links << hash
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
parser = Nokogiri::XML::SAX::Parser.new(Wikidoc.new(by_uri))
|
101
|
+
parser.parse(File.open(ARGV[0]))
|
102
|
+
|
103
|
+
headings.each do |h|
|
104
|
+
puts '*NEWRECORD'
|
105
|
+
puts "UI = #{h[:ui]}"
|
106
|
+
puts "MH = #{h[:mh]}"
|
107
|
+
h[:wikipedia_links].each do |wl|
|
108
|
+
puts "WK = #{wl.to_json}"
|
109
|
+
end
|
110
|
+
puts ''
|
111
|
+
end
|
data/bin/match_wikipedia
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/MESH'
|
4
|
+
require 'net/http'
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
mesh_tree = MESH::Tree.new
|
9
|
+
|
10
|
+
def sluggify(name)
|
11
|
+
CGI.escape(name.downcase.gsub(/\s/, '_').capitalize)
|
12
|
+
end
|
13
|
+
|
14
|
+
count = 0
|
15
|
+
mesh_tree.each do |h|
|
16
|
+
count += 1
|
17
|
+
|
18
|
+
candidates = {}
|
19
|
+
images = {}
|
20
|
+
h.entries.each do |e|
|
21
|
+
|
22
|
+
slug = sluggify(e)
|
23
|
+
uri = URI.parse("http://en.wikipedia.org/wiki/#{slug}")
|
24
|
+
response = Net::HTTP.get_response(uri)
|
25
|
+
if response.code == '200'
|
26
|
+
doc = Nokogiri::HTML(response.body)
|
27
|
+
# heading = doc.css('#firstHeading > span')
|
28
|
+
canonical = doc.xpath('/html/head/link[@rel="canonical"]/@href')
|
29
|
+
candidates[canonical.text] ||= 0
|
30
|
+
candidates[canonical.text] += 1
|
31
|
+
img_node = doc.xpath('(//table[@class="infobox"]//img)[1]/@src')
|
32
|
+
images[canonical.text] ||= img_node.text unless img_node.nil?
|
33
|
+
end
|
34
|
+
|
35
|
+
sleep 0.1
|
36
|
+
end
|
37
|
+
|
38
|
+
best_candidates = candidates.reduce({}) { |h, (k, v)| (h[v] ||= []) << k; h }.max
|
39
|
+
if best_candidates && !best_candidates.empty?
|
40
|
+
score, candidates = best_candidates
|
41
|
+
puts "UI = #{h.unique_id}"
|
42
|
+
puts "MH = #{h.original_heading}"
|
43
|
+
puts "ENTRIES = #{h.entries.join(' -- ')}"
|
44
|
+
candidates.each do |c|
|
45
|
+
puts "WK = #{score}; #{c}"
|
46
|
+
puts "WI = #{images[c]}" unless images[c].nil? || images[c].empty?
|
47
|
+
end
|
48
|
+
puts ''
|
49
|
+
STDOUT.flush
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
end
|
data/bin/translate
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative '../lib/MESH'
|
4
|
+
|
5
|
+
mesh_tree = MESH::Tree.new
|
6
|
+
tr = MESH::Translator.new(MESH::Translator.enus_to_engb)
|
7
|
+
|
8
|
+
mesh_tree.each do |h|
|
9
|
+
puts "*NEWRECORD"
|
10
|
+
puts "MH = #{tr.translate(h.original_heading)}"
|
11
|
+
puts "MS = #{tr.translate(h.summary)}"
|
12
|
+
h.entries.each { |e| puts "ENTRY = #{tr.translate(e)}" }
|
13
|
+
puts "UI = #{h.unique_id}"
|
14
|
+
puts ''
|
15
|
+
end
|
Binary file
|
data/lib/MESH.rb
CHANGED
data/lib/MESH/heading.rb
CHANGED
@@ -2,7 +2,7 @@ module MESH
|
|
2
2
|
class Heading
|
3
3
|
|
4
4
|
include Comparable
|
5
|
-
attr_accessor :unique_id, :tree_numbers, :roots, :parents, :children, :useful, :descriptor_class, :default_locale
|
5
|
+
attr_accessor :unique_id, :tree_numbers, :roots, :parents, :children, :useful, :descriptor_class, :default_locale, :semantic_types, :wikipedia_links
|
6
6
|
attr_reader :linkified_summary
|
7
7
|
|
8
8
|
def <=> other
|
@@ -107,6 +107,7 @@ module MESH
|
|
107
107
|
@tree = tree
|
108
108
|
@useful = true
|
109
109
|
@tree_numbers = []
|
110
|
+
@semantic_types = []
|
110
111
|
@roots = []
|
111
112
|
@parents = []
|
112
113
|
@children = []
|
@@ -114,6 +115,7 @@ module MESH
|
|
114
115
|
@original_heading = {}
|
115
116
|
@natural_language_name = {}
|
116
117
|
@summary = {}
|
118
|
+
@wikipedia_links = []
|
117
119
|
end
|
118
120
|
|
119
121
|
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module MESH
|
2
|
+
class SemanticTypes
|
3
|
+
|
4
|
+
def self.[](key)
|
5
|
+
Types[key]
|
6
|
+
end
|
7
|
+
|
8
|
+
Types = {
|
9
|
+
'T001' => 'Organism',
|
10
|
+
'T002' => 'Plant',
|
11
|
+
'T003' => 'Alga',
|
12
|
+
'T004' => 'Fungus',
|
13
|
+
'T005' => 'Virus',
|
14
|
+
'T006' => 'Rickettsia or Chlamydia',
|
15
|
+
'T007' => 'Bacterium',
|
16
|
+
'T008' => 'Animal',
|
17
|
+
'T009' => 'Invertebrate',
|
18
|
+
'T010' => 'Vertebrate',
|
19
|
+
'T011' => 'Amphibian',
|
20
|
+
'T012' => 'Bird',
|
21
|
+
'T013' => 'Fish',
|
22
|
+
'T014' => 'Reptile',
|
23
|
+
'T015' => 'Mammal',
|
24
|
+
'T016' => 'Human',
|
25
|
+
'T017' => 'Anatomical Structure',
|
26
|
+
'T018' => 'Embryonic Structure',
|
27
|
+
'T019' => 'Congenital Abnormality',
|
28
|
+
'T020' => 'Acquired Abnormality',
|
29
|
+
'T021' => 'Fully Formed Anatomical Structure',
|
30
|
+
'T022' => 'Body System',
|
31
|
+
'T023' => 'Body Part, Organ, or Organ Component',
|
32
|
+
'T024' => 'Tissue',
|
33
|
+
'T025' => 'Cell',
|
34
|
+
'T026' => 'Cell Component',
|
35
|
+
'T028' => 'Gene or Genome',
|
36
|
+
'T029' => 'Body Location or Region',
|
37
|
+
'T030' => 'Body Space or Junction',
|
38
|
+
'T031' => 'Body Substance',
|
39
|
+
'T032' => 'Organism Attribute',
|
40
|
+
'T033' => 'Finding',
|
41
|
+
'T034' => 'Laboratory or Test Result',
|
42
|
+
'T037' => 'Injury or Poisoning',
|
43
|
+
'T038' => 'Biologic Function',
|
44
|
+
'T039' => 'Physiologic Function',
|
45
|
+
'T040' => 'Organism Function',
|
46
|
+
'T041' => 'Mental Process',
|
47
|
+
'T042' => 'Organ or Tissue Function',
|
48
|
+
'T043' => 'Cell Function',
|
49
|
+
'T044' => 'Molecular Function',
|
50
|
+
'T045' => 'Genetic Function',
|
51
|
+
'T046' => 'Pathologic Function',
|
52
|
+
'T047' => 'Disease or Syndrome',
|
53
|
+
'T048' => 'Mental or Behavioral Dysfunction',
|
54
|
+
'T049' => 'Cell or Molecular Dysfunction',
|
55
|
+
'T050' => 'Experimental Model of Disease',
|
56
|
+
'T051' => 'Event',
|
57
|
+
'T052' => 'Activity',
|
58
|
+
'T053' => 'Behavior',
|
59
|
+
'T054' => 'Social Behavior',
|
60
|
+
'T055' => 'Individual Behavior',
|
61
|
+
'T056' => 'Daily or Recreational Activity',
|
62
|
+
'T057' => 'Occupational Activity',
|
63
|
+
'T058' => 'Health Care Activity',
|
64
|
+
'T059' => 'Laboratory Procedure',
|
65
|
+
'T060' => 'Diagnostic Procedure',
|
66
|
+
'T061' => 'Therapeutic or Preventive Procedure',
|
67
|
+
'T062' => 'Research Activity',
|
68
|
+
'T063' => 'Molecular Biology Research Technique',
|
69
|
+
'T064' => 'Governmental or Regulatory Activity',
|
70
|
+
'T065' => 'Educational Activity',
|
71
|
+
'T066' => 'Machine Activity',
|
72
|
+
'T067' => 'Phenomenon or Process',
|
73
|
+
'T068' => 'Human-caused Phenomenon or Process',
|
74
|
+
'T069' => 'Environmental Effect of Humans',
|
75
|
+
'T070' => 'Natural Phenomenon or Process',
|
76
|
+
'T071' => 'Entity',
|
77
|
+
'T072' => 'Physical Object',
|
78
|
+
'T073' => 'Manufactured Object',
|
79
|
+
'T074' => 'Medical Device',
|
80
|
+
'T075' => 'Research Device',
|
81
|
+
'T077' => 'Conceptual Entity',
|
82
|
+
'T078' => 'Idea or Concept',
|
83
|
+
'T079' => 'Temporal Concept',
|
84
|
+
'T080' => 'Qualitative Concept',
|
85
|
+
'T081' => 'Quantitative Concept',
|
86
|
+
'T082' => 'Spatial Concept',
|
87
|
+
'T083' => 'Geographic Area',
|
88
|
+
'T085' => 'Molecular Sequence',
|
89
|
+
'T086' => 'Nucleotide Sequence',
|
90
|
+
'T087' => 'Amino Acid Sequence',
|
91
|
+
'T088' => 'Carbohydrate Sequence',
|
92
|
+
'T089' => 'Regulation or Law',
|
93
|
+
'T090' => 'Occupation or Discipline',
|
94
|
+
'T091' => 'Biomedical Occupation or Discipline',
|
95
|
+
'T092' => 'Organization',
|
96
|
+
'T093' => 'Health Care Related Organization',
|
97
|
+
'T094' => 'Professional Society',
|
98
|
+
'T095' => 'Self-help or Relief Organization',
|
99
|
+
'T096' => 'Group',
|
100
|
+
'T097' => 'Professional or Occupational Group',
|
101
|
+
'T098' => 'Population Group',
|
102
|
+
'T099' => 'Family Group',
|
103
|
+
'T100' => 'Age Group',
|
104
|
+
'T101' => 'Patient or Disabled Group',
|
105
|
+
'T102' => 'Group Attribute',
|
106
|
+
'T103' => 'Chemical',
|
107
|
+
'T104' => 'Chemical Viewed Structurally',
|
108
|
+
'T109' => 'Organic Chemical',
|
109
|
+
'T110' => 'Steroid',
|
110
|
+
'T111' => 'Eicosanoid',
|
111
|
+
'T114' => 'Nucleic Acid, Nucleoside, or Nucleotide',
|
112
|
+
'T115' => 'Organophosphorus Compound',
|
113
|
+
'T116' => 'Amino Acid, Peptide, or Protein',
|
114
|
+
'T118' => 'Carbohydrate',
|
115
|
+
'T119' => 'Lipid',
|
116
|
+
'T120' => 'Chemical Viewed Functionally',
|
117
|
+
'T121' => 'Pharmacologic Substance',
|
118
|
+
'T122' => 'Biomedical or Dental Material',
|
119
|
+
'T123' => 'Biologically Active Substance',
|
120
|
+
'T124' => 'Neuroreactive Substance or Biogenic Amine',
|
121
|
+
'T125' => 'Hormone',
|
122
|
+
'T126' => 'Enzyme',
|
123
|
+
'T127' => 'Vitamin',
|
124
|
+
'T129' => 'Immunologic Factor',
|
125
|
+
'T130' => 'Indicator, Reagent, or Diagnostic Aid',
|
126
|
+
'T131' => 'Hazardous or Poisonous Substance',
|
127
|
+
'T167' => 'Substance',
|
128
|
+
'T168' => 'Food',
|
129
|
+
'T169' => 'Functional Concept',
|
130
|
+
'T170' => 'Intellectual Product',
|
131
|
+
'T171' => 'Language',
|
132
|
+
'T184' => 'Sign or Symptom',
|
133
|
+
'T185' => 'Classification',
|
134
|
+
'T190' => 'Anatomical Abnormality',
|
135
|
+
'T191' => 'Neoplastic Process',
|
136
|
+
'T192' => 'Receptor',
|
137
|
+
'T194' => 'Archaeon',
|
138
|
+
'T195' => 'Antibiotic',
|
139
|
+
'T196' => 'Element, Ion, or Isotope',
|
140
|
+
'T197' => 'Inorganic Chemical',
|
141
|
+
'T200' => 'Clinical Drug',
|
142
|
+
'T201' => 'Clinical Attribute'
|
143
|
+
}
|
144
|
+
|
145
|
+
end
|
146
|
+
end
|
data/lib/MESH/tree.rb
CHANGED
@@ -24,7 +24,7 @@ module MESH
|
|
24
24
|
|
25
25
|
case
|
26
26
|
|
27
|
-
when
|
27
|
+
when line.match(/^\*NEWRECORD$/)
|
28
28
|
unless current_heading.unique_id.nil?
|
29
29
|
current_heading.entries.sort!
|
30
30
|
@headings << current_heading
|
@@ -56,6 +56,9 @@ module MESH
|
|
56
56
|
when matches = line.match(/^DC = (.*)/)
|
57
57
|
current_heading.descriptor_class = @@descriptor_classes[matches[1].to_i]
|
58
58
|
|
59
|
+
when matches = line.match(/^ST = (.*)/)
|
60
|
+
current_heading.semantic_types << MESH::SemanticTypes[matches[1]]
|
61
|
+
|
59
62
|
when matches = line.match(/^MH = (.*)/)
|
60
63
|
mh = matches[1]
|
61
64
|
current_heading.set_original_heading(mh)
|
@@ -107,7 +110,7 @@ module MESH
|
|
107
110
|
|
108
111
|
case
|
109
112
|
|
110
|
-
when
|
113
|
+
when line.match(/^\*NEWRECORD$/)
|
111
114
|
unless unique_id.nil?
|
112
115
|
entries.sort!
|
113
116
|
entries.uniq!
|
@@ -147,6 +150,45 @@ module MESH
|
|
147
150
|
@locales << locale
|
148
151
|
end
|
149
152
|
|
153
|
+
def load_wikipedia
|
154
|
+
return if @wikipedia_loaded
|
155
|
+
filename = File.expand_path("../../../data/mesh_data_2014/d2014.wikipedia.bin.gz", __FILE__)
|
156
|
+
gzipped_file = File.open(filename)
|
157
|
+
file = Zlib::GzipReader.new(gzipped_file)
|
158
|
+
|
159
|
+
unique_id = nil
|
160
|
+
wikipedia_links = []
|
161
|
+
file.each_line do |line|
|
162
|
+
|
163
|
+
case
|
164
|
+
|
165
|
+
when line.match(/^\*NEWRECORD$/)
|
166
|
+
unless unique_id.nil?
|
167
|
+
if heading = find(unique_id)
|
168
|
+
wikipedia_links.each do |wl|
|
169
|
+
wl[:score] = (wl[:score].to_f / heading.entries.length.to_f).round(2)
|
170
|
+
end
|
171
|
+
heading.wikipedia_links = wikipedia_links
|
172
|
+
end
|
173
|
+
|
174
|
+
wikipedia_links = []
|
175
|
+
unique_id = nil
|
176
|
+
end
|
177
|
+
|
178
|
+
when matches = line.match(/^UI = (.*)/)
|
179
|
+
unique_id = matches[1]
|
180
|
+
|
181
|
+
when matches = line.match(/^WK = (.*)/)
|
182
|
+
hash = JSON.parse(matches[1], symbolize_names: true)
|
183
|
+
wikipedia_links << hash
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
@wikipedia_loaded = true
|
189
|
+
end
|
190
|
+
|
191
|
+
|
150
192
|
def linkify_summaries &block
|
151
193
|
@headings.each do |h|
|
152
194
|
h.linkify_summary &block
|
data/lib/MESH/version.rb
CHANGED
data/test/mesh_core_test.rb
CHANGED
@@ -105,6 +105,16 @@ module MESH
|
|
105
105
|
assert_equal :check_tag, mh.descriptor_class
|
106
106
|
end
|
107
107
|
|
108
|
+
def test_have_the_correct_semantic_type
|
109
|
+
mh = @mesh_tree.find('D000224')
|
110
|
+
assert_equal ['Disease or Syndrome'], mh.semantic_types
|
111
|
+
mh = @mesh_tree.find('D005260')
|
112
|
+
assert_equal ['Organism Attribute'], mh.semantic_types
|
113
|
+
mh = @mesh_tree.find('D014148')
|
114
|
+
assert_equal ['Organic Chemical', 'Pharmacologic Substance'], mh.semantic_types
|
115
|
+
|
116
|
+
end
|
117
|
+
|
108
118
|
def test_have_the_correct_original_heading
|
109
119
|
mh = @mesh_tree.find('D000224')
|
110
120
|
assert_equal 'Addison Disease', mh.original_heading
|
@@ -219,6 +229,95 @@ module MESH
|
|
219
229
|
assert_equal expected_entries_en.sort, mh.entries(:en_gb)
|
220
230
|
end
|
221
231
|
|
232
|
+
def test_have_a_single_wikipedia_link
|
233
|
+
|
234
|
+
expected = {
|
235
|
+
'D000001' => 'http://en.wikipedia.org/wiki/A23187',
|
236
|
+
'D000005' => 'http://en.wikipedia.org/wiki/Abdomen',
|
237
|
+
'D000082' => 'http://en.wikipedia.org/wiki/Paracetamol'
|
238
|
+
}
|
239
|
+
|
240
|
+
expected.each do |id, expected_link|
|
241
|
+
mh = @mesh_tree.find(id)
|
242
|
+
assert_equal 1, mh.wikipedia_links.length
|
243
|
+
assert_equal expected_link, mh.wikipedia_links[0][:link]
|
244
|
+
end
|
245
|
+
|
246
|
+
end
|
247
|
+
|
248
|
+
def test_have_a_single_wikipedia_score
|
249
|
+
expected = {
|
250
|
+
'D000001' => 0.5,
|
251
|
+
'D000005' => 1.0,
|
252
|
+
'D000082' => 0.35
|
253
|
+
}
|
254
|
+
|
255
|
+
expected.each do |id, expected_score|
|
256
|
+
mh = @mesh_tree.find(id)
|
257
|
+
assert_equal 1, mh.wikipedia_links.length
|
258
|
+
assert_equal expected_score, mh.wikipedia_links[0][:score]
|
259
|
+
end
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
def test_have_a_single_wikipedia_image
|
264
|
+
expected = {
|
265
|
+
'D000001' => 'http://upload.wikimedia.org/wikipedia/commons/thumb/1/17/A23187.png/220px-A23187.png',
|
266
|
+
'D000005' => 'http://upload.wikimedia.org/wikipedia/commons/thumb/3/3b/Abdomen_%28PSF%29.jpg/250px-Abdomen_%28PSF%29.jpg',
|
267
|
+
'D000082' => 'http://upload.wikimedia.org/wikipedia/commons/thumb/2/29/Paracetamol-skeletal.svg/150px-Paracetamol-skeletal.svg.png'
|
268
|
+
}
|
269
|
+
|
270
|
+
expected.each do |id, expected_image|
|
271
|
+
mh = @mesh_tree.find(id)
|
272
|
+
assert_equal 1, mh.wikipedia_links.length
|
273
|
+
assert_equal expected_image, mh.wikipedia_links[0][:image]
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
def test_have_a_single_wikipedia_abstract
|
278
|
+
expected = {
|
279
|
+
'D000001' => '| CAS_number = 52665-69-7',
|
280
|
+
'D000005' => 'The abdomen (less formally called the belly, stomach, or tummy), in vertebrates such as mammals, constitutes the part of the body between the thorax (chest) and pelvis. The region enclosed by the abdomen is termed the abdominal cavity.',
|
281
|
+
'D000082' => '| MedlinePlus = a681004'
|
282
|
+
}
|
283
|
+
|
284
|
+
expected.each do |id, expected_abstract|
|
285
|
+
mh = @mesh_tree.find(id)
|
286
|
+
assert_equal 1, mh.wikipedia_links.length
|
287
|
+
assert_equal expected_abstract, mh.wikipedia_links[0][:abstract]
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def test_have_more_than_one_wikipedia_link
|
292
|
+
mh = @mesh_tree.find('D000100')
|
293
|
+
expected = %w(
|
294
|
+
http://en.wikipedia.org/wiki/Sodium_acetrizoate
|
295
|
+
http://en.wikipedia.org/wiki/Acetrizoic_acid
|
296
|
+
)
|
297
|
+
assert_equal expected, mh.wikipedia_links.map { |l| l[:link] }
|
298
|
+
end
|
299
|
+
|
300
|
+
def test_have_more_than_one_wikipedia_score
|
301
|
+
mh = @mesh_tree.find('D000100')
|
302
|
+
expected = [0.09, 0.09]
|
303
|
+
assert_equal expected, mh.wikipedia_links.map { |l| l[:score] }
|
304
|
+
end
|
305
|
+
|
306
|
+
def test_have_more_than_one_wikipedia_image
|
307
|
+
mh = @mesh_tree.find('D000100')
|
308
|
+
expected = %w(
|
309
|
+
http://upload.wikimedia.org/wikipedia/commons/thumb/4/4d/Sodium_acetrizoate.svg/150px-Sodium_acetrizoate.svg.png
|
310
|
+
http://upload.wikimedia.org/wikipedia/commons/thumb/2/26/Acetrizoic_acid.png/220px-Acetrizoic_acid.png
|
311
|
+
)
|
312
|
+
assert_equal expected, mh.wikipedia_links.map { |l| l[:image] }
|
313
|
+
end
|
314
|
+
|
315
|
+
def test_have_more_than_one_wikipedia_abstract
|
316
|
+
mh = @mesh_tree.find('D000100')
|
317
|
+
expected = ['| CAS_number = 129-63-5', '| CAS_number = 85-36-9']
|
318
|
+
assert_equal expected, mh.wikipedia_links.map { |l| l[:abstract] }
|
319
|
+
end
|
320
|
+
|
222
321
|
def test_have_the_correct_parent
|
223
322
|
mh = @mesh_tree.find('D000001')
|
224
323
|
assert_equal 1, mh.parents.length
|
@@ -578,6 +677,7 @@ module MESH
|
|
578
677
|
def setup
|
579
678
|
@@mesh_tree ||= MESH::Tree.new
|
580
679
|
@@mesh_tree.load_translation(:en_gb)
|
680
|
+
@@mesh_tree.load_wikipedia
|
581
681
|
@mesh_tree = @@mesh_tree
|
582
682
|
@example_text ||= 'Leukaemia in Downs Syndrome
|
583
683
|
Overview
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mesh-medical-subject-headings
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rob Styles
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - '>='
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: nokogiri
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
description: A ruby gem containing MeSH subject headings (https://www.nlm.nih.gov/mesh/)
|
98
112
|
for use in classifying and entity recognition.
|
99
113
|
email:
|
@@ -111,14 +125,19 @@ files:
|
|
111
125
|
- MESH.gemspec
|
112
126
|
- README.md
|
113
127
|
- Rakefile
|
128
|
+
- bin/extract_wikipedia_abstracts
|
129
|
+
- bin/match_wikipedia
|
130
|
+
- bin/translate
|
114
131
|
- data/mesh_data_2014/c2014.bin.gz
|
115
132
|
- data/mesh_data_2014/d2014.bin.gz
|
116
133
|
- data/mesh_data_2014/d2014.en_gb.bin.gz
|
134
|
+
- data/mesh_data_2014/d2014.wikipedia.bin.gz
|
117
135
|
- data/mesh_data_2014/mtrees2014.bin.gz
|
118
136
|
- data/mesh_data_2014/q2014.bin.gz
|
119
137
|
- lib/MESH.rb
|
120
138
|
- lib/MESH/classifier.rb
|
121
139
|
- lib/MESH/heading.rb
|
140
|
+
- lib/MESH/semantic_types.rb
|
122
141
|
- lib/MESH/translator.rb
|
123
142
|
- lib/MESH/tree.rb
|
124
143
|
- lib/MESH/version.rb
|