mesh-medical-subject-headings 2.2.1 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd1eb2f1f911abc7f2421de83fb5e7d6c83dc3cb
4
- data.tar.gz: 70b0081953e3cdfb68335d8cd33d4841b6424205
3
+ metadata.gz: 05dc9df68ff77e4f8a3b89e41f4b67a3da727332
4
+ data.tar.gz: 8f57de7c848aa7e112ae9b3ecf0d34db871eca28
5
5
  SHA512:
6
- metadata.gz: 8b4d63674735db2a1675056d65446fadec07be1b3f81d0d157008c5073f8c1733b165a2c785f220e9578c8e3e53173657c621abe4ba97da9821fcbc79ede1280
7
- data.tar.gz: cd889c0a8a95d462d4df42b26980ce3506d9e2e52a4ad973aac0f45dd0e1a63af3c5a241e812cdf134e63d577074a69766991f937d005e812863f8890cba3709
6
+ metadata.gz: 345a7a0f904a25a0518a53f7babf30f13a1e4cb05e0862fe79eb1ae1a054b4224803bb8ba4d55dbe1f49575f0fa2f6ed6c5d32ec9163c7f195b025e59fa3ee6d
7
+ data.tar.gz: 6090811709b5570512708496c2bf468f6ae62ae4b551f95df54e3101b94dc93ac763133d8b5e6b91bf6e0ac5b1978206b32162ab7638b47a57b8481b4ebba77b
data/.gitignore CHANGED
@@ -12,7 +12,7 @@ spec/reports
12
12
  test/tmp
13
13
  test/version_tmp
14
14
  tmp
15
-
15
+ data/mesh_data_2014/*.bin
16
16
  # YARD artifacts
17
17
  .yardoc
18
18
  _yardoc
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ #2.3.0 / 2014-09-30
2
+ * [FEATURE] Significant performance improvements to entity recognition in text
3
+
1
4
  #2.2.1 / 2014-07-18
2
5
  * [FEATURE] Headings now have forward references
3
6
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- mesh-medical-subject-headings (2.2.1)
4
+ mesh-medical-subject-headings (2.3.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'MESH'
4
+
5
+ class Numeric
6
+ def duration
7
+ secs, millisecs = self.divmod 1
8
+ # secs = self.to_int
9
+ mins = secs / 60
10
+ hours = mins / 60
11
+ days = hours / 24
12
+
13
+ if days > 0
14
+ "#{days} days and #{hours % 24} hours"
15
+ elsif hours > 0
16
+ "#{hours} hours and #{mins % 60} minutes"
17
+ elsif mins > 0
18
+ "#{mins} minutes and #{secs % 60} seconds"
19
+ elsif secs >= 0
20
+ "#{(millisecs + secs).round(3)} seconds"
21
+ end
22
+ end
23
+ end
24
+
25
+ def time_this(name, &block)
26
+ print "#{name}"
27
+ STDOUT.flush
28
+ start = Time.now.to_f
29
+ result = yield
30
+ finish = Time.now.to_f
31
+ puts "\t#{(finish - start).duration}"
32
+ result
33
+ end
34
+
35
+ mesh_tree = time_this('Loading MeSH Tree') { MESH::Tree.new }
36
+ time_this('Loading en_gb translation') { mesh_tree.load_translation(:en_gb) }
37
+ time_this('Loading wikipedia') { mesh_tree.load_wikipedia }
38
+
39
+ json_str = File.new('./example.json').read
40
+ extracted = JSON.parse(json_str)
41
+
42
+ title_headings = time_this('Matching in title') { mesh_tree.match_in_text(extracted['title']) }
43
+ description_headings = time_this('Matching in description') { mesh_tree.match_in_text(extracted['description']) }
44
+ content_headings = time_this('Matching in content') { mesh_tree.match_in_text(extracted['content']) }
45
+
46
+ classifier = MESH::Classifier.new()
47
+ classification = time_this('Classifying from matches') { classifier.classify([
48
+ {weight: 10.0, matches: title_headings},
49
+ {weight: 5.0, matches: description_headings},
50
+ {weight: 1.0, matches: content_headings}
51
+ ]) }
data/bin/example.json ADDED
@@ -0,0 +1,2 @@
1
+
2
+ { "title": "S4 Heart Sound - Auscultation Review and Summary | Learntheheart.com", "description": "The S4 heart sound is reviewed including auscultation, causes and detailed discussion about the atrial gallop.", "content": "Skip to Main Content Home About Contact ECG Review ECG Basics Intro to ECG Approach to ECG Interpretation Determining HR Determining Rhythm Determining Axis P Wave PR Interval PR Segment Q Wave R Wave S Wave QRS Complex T Wave QT Interval ST Segment TP Segment ECG Reviews / Criteria ECG Quiz ECG Cases ECG Archive Cardiology Review Cardiology Topic Reviews All Cardiology Topics Atrial Fibrillation CAD - STEMI CAD - Stable Angina CAD - Unstable Angina/NSTEMI Heart Sounds Heart Murmurs Heart Failure Aortic Stenosis Aortic Regurgitation Mitral Stenosis Mitral Regurgitation USMLE Review Clinical Trials Acute Coronary Syndrome Coronary Artery Disease Coronary Intervention Congestive Heart Failure Atrial Fibrillation Arrhythmia Hypertension Lipid Disorders Cardiac Surgery Cardiology Pearls Cardiology Mnemonics Cardiology Guidelines Quizzes General Cardiology Expert Cardiology ECG Quiz Multiple Choice Questions Heart Sounds Heart Murmurs Coronary Artery Disease - Stable Angina Coronary Artery Disease - Unstable Angina/NSTEMI Coronary Artery Disease - STEMI Atrial Fibrillation Aortic Stenosis Aortic Regurgitation Mitral Stenosis Mitral Regurgitation Case Questions Coronary Artery Disease - Stable Angina Coronary Artery Disease - Unstable Angina/NSTEMI Coronary Artery Disease - STEMI Atrial Fibrillation Aortic Stenosis Aortic Regurgitation Mitral Stenosis Mitral Regurgitation Jeopardy Games ECG Jeopardy Acute Coronary Syndromes Aortic Stenosis Atrial Fibrillation Blog Home|Cardiology Review|Cardiology Topic Reviews|Heart Sounds|S4 Heart Sound Print Email /* */ var googletag = googletag || {}; googletag.cmd = googletag.cmd || []; (function() { var gads = document.createElement('script'); gads.async = true; gads.type = 'text/javascript'; var useSSL = 'https:' == document.location.protocol; gads.src = (useSSL ? 'https:' : 'http:') + '//www.googletagservices.com/tag/js/gpt.js'; var node = document.getElementsByTagName('script')[0]; node.parentNode.insertBefore(gads, node); })(); googletag.cmd.push(function() { googletag.defineSlot('/45868137/Heart_Sounds_Review', [160, 600], 'div-gpt-ad-1373795768965-0').addService(googletag.pubads()); googletag.pubads().enableSingleRequest(); googletag.enableServices(); }); googletag.cmd.push(function() { googletag.display('div-gpt-ad-1373795768965-0'); }); S4 Heart Sound The fourth heart sound (S4), also known as the \"atrial gallop\", occurs just before S1 when the atria contract to force blood into the LV. If the LV is non-compliant and atrial contraction forces blood through the AV valves, an S4 is produced by the blood striking the LV. CLINICAL PEARL: A S4 heart sound occurs during active LV filling when atrial contraction forces blood into a non-compliant LV. Therefore any condition that creates a non-compliant LV will produce a S4, while any condition that creates an overly compliant LV will produce a S3 (as described above). A S4 heart sound can be an important sign of diastolic heart failure or active ischemia and is rarely a normal finding. Diastolic heart failure frequently results from severe left ventricular hypertrophy (LVH) resulting in impaired relaxation (compliance) of the LV. In this setting, a S4 is often heard. Also, if a person is actively having myocardial ischemia, adequate ATP can't be synthesized to allow for the release of myosin from actin, thus the myocardium is not able to relax and a S4 will be present. CLINICAL PEARL: A S4 heart sound is often a sign of diastolic heart failure and it is rarely a normal finding (unlike a S3). Normal LV Dilated LV - S3 Present CLINICAL PEARL: If the patient is in atrial fibrillation, the atria are not contracting and it is impossible to have a S4 heart sound. Like S3, the S4 sound is low pitched and best heard at the apex with the patient in the left lateral decubitus position. Comparing the 3rd and 4th heart sounds S3 - \"ventricular gallop\" S4 - \"atrial gallop\" Occurs in early diastole Occurs during passive LV filling May be normal at times Requires a very compliant LV Can be a sign of systolic CHF Occurs in late diastole Occurs during active LV filling Almost always abnormal Requires a non-compliant LV Can be a sign of diastolic CHF PROCEED TO THE EXTRA HEART SOUND REVIEW Related Links: Introduction to Heart Sounds S1 Heart Sound S2 Heart Sound S3 Heart Sound S4 Heart Sound Extra Heart Sounds Heart Sounds Multiple Choice Questions googletag.cmd.push(function() { googletag.display('div-gpt-ad-1375011221937-1'); }); googletag.cmd.push(function() { googletag.display('div-gpt-ad-1375179175725-0'); }); Site Map Advertise Disclaimer Contact Us !function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs'); © 2014 LearntheHeart.com. All rights reserved. Cardiology and ECG Blog Sign-Up E-mail * close (X) /* */", "url": "http://www.learntheheart.com/cardiology-review/s4-heart-sound/" }
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'MESH'
4
+ require 'ruby-prof'
5
+
6
+ class Numeric
7
+ def duration
8
+ secs, millisecs = self.divmod 1
9
+ # secs = self.to_int
10
+ mins = secs / 60
11
+ hours = mins / 60
12
+ days = hours / 24
13
+
14
+ if days > 0
15
+ "#{days} days and #{hours % 24} hours"
16
+ elsif hours > 0
17
+ "#{hours} hours and #{mins % 60} minutes"
18
+ elsif mins > 0
19
+ "#{mins} minutes and #{secs % 60} seconds"
20
+ elsif secs >= 0
21
+ "#{(millisecs + secs).round(3)} seconds"
22
+ end
23
+ end
24
+ end
25
+
26
+ def time_this(name, &block)
27
+ # print "#{name}"
28
+ # STDOUT.flush
29
+ start = Time.now.to_f
30
+ result = yield
31
+ finish = Time.now.to_f
32
+ # puts "\t#{(finish - start).duration}"
33
+ result
34
+ end
35
+
36
+ mesh_tree = time_this('Loading MeSH Tree') { MESH::Tree.new }
37
+ time_this('Loading en_gb translation') { mesh_tree.load_translation(:en_gb) }
38
+ time_this('Loading wikipedia') { mesh_tree.load_wikipedia }
39
+
40
+ json_str = File.new('./example.json').read
41
+ extracted = JSON.parse(json_str)
42
+
43
+ result = RubyProf.profile do
44
+
45
+ title_headings = time_this('Matching in title') { mesh_tree.match_in_text(extracted['title']) }
46
+ # description_headings = time_this('Matching in description') { mesh_tree.match_in_text(extracted['description']) }
47
+ # content_headings = time_this('Matching in content') { mesh_tree.match_in_text(extracted['content']) }
48
+
49
+ # classifier = MESH::Classifier.new()
50
+ # classification = time_this('Classifying from matches') { classifier.classify([
51
+ # {weight: 10.0, matches: title_headings},
52
+ # {weight: 5.0, matches: description_headings},
53
+ # {weight: 1.0, matches: content_headings}
54
+ # ]) }
55
+ end
56
+
57
+ printer = RubyProf::GraphHtmlPrinter.new(result)
58
+ printer.print(STDOUT)
data/lib/MESH.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'json'
2
+ require 'set'
2
3
  require 'zlib'
3
4
  require 'MESH/version'
4
5
  require 'MESH/tree'
data/lib/MESH/tree.rb CHANGED
@@ -11,6 +11,7 @@ module MESH
11
11
  @by_tree_number = {}
12
12
  @by_original_heading = {}
13
13
  @by_entry = {}
14
+ @by_entry_word = Hash.new { |h, k| h[k] = Set.new }
14
15
  @locales = [@@default_locale]
15
16
 
16
17
  filename = File.expand_path('../../../data/mesh_data_2014/d2014.bin.gz', __FILE__)
@@ -42,6 +43,7 @@ module MESH
42
43
  @headings << mh
43
44
  @by_unique_id[mh.unique_id] = mh
44
45
  @by_original_heading[mh.original_heading] = mh
46
+ add_heading_by_entry_word(mh, mh.original_heading)
45
47
  mh.tree_numbers.each do |tree_number|
46
48
  raise if @by_tree_number[tree_number]
47
49
  @by_tree_number[tree_number] = mh
@@ -50,6 +52,14 @@ module MESH
50
52
  match_headings.each do |entry|
51
53
  raise if @by_entry[entry]
52
54
  @by_entry[entry] = mh
55
+ add_heading_by_entry_word(mh, entry)
56
+ end
57
+ end
58
+
59
+ def add_heading_by_entry_word(mh, entry)
60
+ entry.split.each do |word|
61
+ word.downcase!
62
+ @by_entry_word[word] << mh
53
63
  end
54
64
  end
55
65
 
@@ -80,7 +90,10 @@ module MESH
80
90
  heading.set_original_heading(original_heading, locale) unless original_heading.nil?
81
91
  heading.set_natural_language_name(natural_language_name, locale) unless natural_language_name.nil?
82
92
  heading.set_summary(summary, locale) unless summary.nil?
83
- entries.each { |entry| heading.entries(locale) << entry }
93
+ entries.each do |entry|
94
+ heading.entries(locale) << entry
95
+ add_heading_by_entry_word(heading, entry)
96
+ end
84
97
  end
85
98
 
86
99
  entries = []
@@ -187,6 +200,10 @@ module MESH
187
200
  return @by_entry[entry_match_key(entry)]
188
201
  end
189
202
 
203
+ def find_by_entry_word(word)
204
+ return @by_entry_word[word]
205
+ end
206
+
190
207
  def where(conditions)
191
208
  matches = []
192
209
  @headings.each do |heading|
@@ -204,8 +221,12 @@ module MESH
204
221
  def match_in_text(text)
205
222
  return [] if text.nil?
206
223
  downcased = text.downcase
224
+ candidate_headings = Set.new
225
+ downcased.split(/\W+/).uniq.each do |word|
226
+ candidate_headings.merge(find_by_entry_word(word))
227
+ end
207
228
  matches = []
208
- @headings.each do |heading|
229
+ candidate_headings.each do |heading|
209
230
  next unless heading.useful
210
231
  @locales.each do |locale|
211
232
  heading.entries(locale).each do |entry|
@@ -216,7 +237,8 @@ module MESH
216
237
  regex = /(^|\W)#{Regexp.quote(entry)}(\W|$)/i
217
238
  end
218
239
  text.to_enum(:scan, regex).map do |m,|
219
- matches << {heading: heading, matched: entry, index: $`.size}
240
+ match = Regexp.last_match
241
+ matches << {heading: heading, matched: entry, index: match.offset(0)}
220
242
  end
221
243
  end
222
244
  end
@@ -224,10 +246,10 @@ module MESH
224
246
  end
225
247
  confirmed_matches = []
226
248
  matches.combination(2) do |l, r|
227
- if (r[:index] >= l[:index]) && (r[:index] + r[:matched].length <= l[:index] + l[:matched].length)
249
+ if (r[:index][0] >= l[:index][0]) && (r[:index][1] <= l[:index][1])
228
250
  #r is within l
229
251
  r[:delete] = true
230
- elsif (l[:index] >= r[:index]) && (l[:index] + l[:matched].length <= r[:index] + r[:matched].length)
252
+ elsif (l[:index][0] >= r[:index][0]) && (l[:index][1] <= r[:index][1])
231
253
  #l is within r
232
254
  l[:delete] = true
233
255
  end
data/lib/MESH/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Mesh
2
- VERSION = '2.2.1'
2
+ VERSION = '2.3.0'
3
3
  end
data/test/tree_test.rb CHANGED
@@ -43,19 +43,19 @@ module MESH
43
43
  def test_find_by_entry
44
44
 
45
45
  expected_entries = [
46
- 'Adult Reye Syndrome',
47
- 'Adult Reye\'s Syndrome',
48
- 'Fatty Liver with Encephalopathy',
49
- 'Reye Johnson Syndrome',
50
- 'Reye Like Syndrome',
51
- 'Reye Syndrome',
52
- 'Reye Syndrome, Adult',
53
- 'Reye\'s Like Syndrome',
54
- 'Reye\'s Syndrome',
55
- 'Reye\'s Syndrome, Adult',
56
- 'Reye\'s-Like Syndrome',
57
- 'Reye-Johnson Syndrome',
58
- 'Reye-Like Syndrome'
46
+ 'Adult Reye Syndrome',
47
+ 'Adult Reye\'s Syndrome',
48
+ 'Fatty Liver with Encephalopathy',
49
+ 'Reye Johnson Syndrome',
50
+ 'Reye Like Syndrome',
51
+ 'Reye Syndrome',
52
+ 'Reye Syndrome, Adult',
53
+ 'Reye\'s Like Syndrome',
54
+ 'Reye\'s Syndrome',
55
+ 'Reye\'s Syndrome, Adult',
56
+ 'Reye\'s-Like Syndrome',
57
+ 'Reye-Johnson Syndrome',
58
+ 'Reye-Like Syndrome'
59
59
  ]
60
60
 
61
61
  entries_to_test = expected_entries.flat_map do |e|
@@ -73,6 +73,28 @@ module MESH
73
73
  assert_nil @mesh_tree.find_by_entry('foo')
74
74
  end
75
75
 
76
+ def test_find_by_entry_word
77
+ expected_ids = %w(D000003)
78
+ actual = @mesh_tree.find_by_entry_word('abattoir')
79
+ actual_ids = actual.map { |mh| mh.unique_id }
80
+ assert_equal expected_ids, actual_ids, 'Should return all headings with this word in any entry'
81
+ end
82
+
83
+ def test_find_by_entry_word_case_insensitive
84
+ skip 'find by word does not support case insensitive searches'
85
+ # expected_ids = %w(D000003)
86
+ # actual = @mesh_tree.find_by_entry_word('AbaTToir')
87
+ # actual_ids = actual.map { |mh| mh.unique_id }
88
+ # assert_equal expected_ids, actual_ids, 'Should return all headings with this word in any entry'
89
+ end
90
+
91
+ def test_find_by_anglicised_entry_word
92
+ expected_ids = %w(D001471 D004938 D004947 D015154)
93
+ actual = @mesh_tree.find_by_entry_word('oesophagus')
94
+ actual_ids = actual.map { |mh| mh.unique_id }
95
+ assert_equal expected_ids, actual_ids, 'Should return all headings with this word in any entry'
96
+ end
97
+
76
98
  def test_linkifies_all_summaries
77
99
  mesh = MESH::Tree.new
78
100
  mesh.linkify_summaries do |text, heading|
@@ -107,7 +129,7 @@ module MESH
107
129
  expected = expected_ids.map { |id| @mesh_tree.find(id) }
108
130
  matches = @mesh_tree.match_in_text(@example_text)
109
131
  actual = matches.map { |match| match[:heading] }.uniq
110
- assert_equal expected, actual
132
+ assert_equal expected.sort, actual.sort
111
133
  ensure
112
134
  not_useful_ids.each { |id| @mesh_tree.find(id).useful = true }
113
135
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mesh-medical-subject-headings
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.1
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rob Styles
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-18 00:00:00.000000000 Z
11
+ date: 2014-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -125,8 +125,11 @@ files:
125
125
  - MESH.gemspec
126
126
  - README.md
127
127
  - Rakefile
128
+ - bin/benchmark_match_in_text
129
+ - bin/example.json
128
130
  - bin/extract_wikipedia_abstracts
129
131
  - bin/match_wikipedia
132
+ - bin/profile_in_text
130
133
  - bin/translate
131
134
  - data/mesh_data_2014/c2014.bin.gz
132
135
  - data/mesh_data_2014/d2014.bin.gz