rbbt-text 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/document.rb +46 -0
  3. data/lib/rbbt/document/annotation.rb +42 -0
  4. data/lib/rbbt/document/corpus.rb +38 -0
  5. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  6. data/lib/rbbt/ner/NER.rb +3 -3
  7. data/lib/rbbt/ner/abner.rb +1 -1
  8. data/lib/rbbt/ner/banner.rb +1 -1
  9. data/lib/rbbt/ner/brat.rb +1 -1
  10. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  11. data/lib/rbbt/ner/g_norm_plus.rb +19 -2
  12. data/lib/rbbt/ner/linnaeus.rb +3 -3
  13. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  14. data/lib/rbbt/ner/oscar3.rb +1 -2
  15. data/lib/rbbt/ner/oscar4.rb +3 -3
  16. data/lib/rbbt/ner/patterns.rb +6 -5
  17. data/lib/rbbt/ner/regexpNER.rb +1 -2
  18. data/lib/rbbt/ner/token_trieNER.rb +6 -6
  19. data/lib/rbbt/nlp/genia/sentence_splitter.rb +1 -1
  20. data/lib/rbbt/nlp/nlp.rb +5 -5
  21. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  22. data/lib/rbbt/segment.rb +177 -0
  23. data/lib/rbbt/segment/annotation.rb +58 -0
  24. data/lib/rbbt/segment/encoding.rb +18 -0
  25. data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -11
  26. data/lib/rbbt/segment/overlaps.rb +63 -0
  27. data/lib/rbbt/segment/range_index.rb +35 -0
  28. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  29. data/lib/rbbt/segment/token.rb +23 -0
  30. data/lib/rbbt/{text/segment → segment}/transformed.rb +7 -9
  31. data/lib/rbbt/segment/tsv.rb +41 -0
  32. data/share/install/software/Linnaeus +1 -1
  33. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  34. data/test/rbbt/document/test_annotation.rb +140 -0
  35. data/test/rbbt/document/test_corpus.rb +33 -0
  36. data/test/rbbt/ner/test_finder.rb +3 -3
  37. data/test/rbbt/ner/test_g_norm_plus.rb +11 -1
  38. data/test/rbbt/ner/test_patterns.rb +9 -9
  39. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  40. data/test/rbbt/ner/test_rnorm.rb +3 -4
  41. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  42. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +13 -3
  43. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  44. data/test/rbbt/segment/test_annotation.rb +40 -0
  45. data/test/rbbt/segment/test_corpus.rb +36 -0
  46. data/test/rbbt/segment/test_encoding.rb +24 -0
  47. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +12 -9
  48. data/test/rbbt/segment/test_overlaps.rb +69 -0
  49. data/test/rbbt/segment/test_range_index.rb +43 -0
  50. data/test/rbbt/{text/segment → segment}/test_transformed.rb +76 -51
  51. data/test/rbbt/test_document.rb +14 -0
  52. data/test/rbbt/test_segment.rb +187 -0
  53. data/test/test_helper.rb +5 -3
  54. metadata +40 -32
  55. data/lib/rbbt/text/corpus.rb +0 -106
  56. data/lib/rbbt/text/corpus/document.rb +0 -383
  57. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  58. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  59. data/lib/rbbt/text/document.rb +0 -39
  60. data/lib/rbbt/text/segment.rb +0 -363
  61. data/lib/rbbt/text/segment/docid.rb +0 -46
  62. data/lib/rbbt/text/segment/relationship.rb +0 -24
  63. data/lib/rbbt/text/segment/token.rb +0 -49
  64. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  65. data/test/rbbt/text/corpus/test_document.rb +0 -82
  66. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  67. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  68. data/test/rbbt/text/test_corpus.rb +0 -34
  69. data/test/rbbt/text/test_document.rb +0 -58
  70. data/test/rbbt/text/test_segment.rb +0 -100
@@ -23,9 +23,9 @@ class TestRegExpNER < Test::Unit::TestCase
23
23
  matches = RegExpNER.match_regexp_hash(sentence, regexp_hash)
24
24
 
25
25
  assert_equal ["this", "this", "that"].sort, matches.sort
26
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
27
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
28
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
26
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
27
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
28
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
29
29
  end
30
30
 
31
31
  def test_define_regexps
@@ -39,9 +39,9 @@ class TestRegExpNER < Test::Unit::TestCase
39
39
 
40
40
  matches = ner.entities(sentence)
41
41
  assert_equal ["this", "this", "that"].sort, matches.sort
42
- assert_equal "In ".length, matches.select{|m| m.type == :this }[0].offset
43
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this }[1].offset
44
- assert_equal :this, matches.select{|m| m.type == :this }[0].type
42
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this }[0].offset
43
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this }[1].offset
44
+ assert_equal :this, matches.select{|m| m.entity_type == :this }[0].entity_type
45
45
  end
46
46
 
47
47
 
@@ -51,9 +51,9 @@ class TestRegExpNER < Test::Unit::TestCase
51
51
  ner = RegExpNER.new({:this => /this/, :that => /that/})
52
52
  matches = ner.entities(sentence)
53
53
  assert_equal ["this", "this", "that"].sort, matches.sort
54
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
55
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
56
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
54
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
55
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
56
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
57
57
 
58
58
  Segmented.setup(sentence)
59
59
  ner_this = RegExpNER.new({:this => /this/})
@@ -64,9 +64,9 @@ class TestRegExpNER < Test::Unit::TestCase
64
64
  matches = sentence.segments
65
65
 
66
66
  assert_equal ["this", "this", "that"].sort, matches.sort
67
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
68
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
69
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
67
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
68
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
69
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
70
70
  end
71
71
 
72
72
  def test_entities_captures
@@ -75,8 +75,8 @@ class TestRegExpNER < Test::Unit::TestCase
75
75
  ner = RegExpNER.new({:this => /this/, :that => /that/, :should => /I (should)/})
76
76
  matches = ner.entities(sentence)
77
77
  assert_equal ["this", "this", "that", "should"].sort, matches.sort
78
- assert_equal "In this sentence I ".length, matches.select{|m| m.type == :should}[0].offset
79
- assert_equal :should, matches.select{|m| m.type == :should}[0].type
78
+ assert_equal "In this sentence I ".length, matches.select{|m| m.entity_type == :should}[0].offset
79
+ assert_equal :should, matches.select{|m| m.entity_type == :should}[0].entity_type
80
80
  end
81
81
 
82
82
 
@@ -27,10 +27,9 @@ S000000376 AAA GENE1 DDD
27
27
  assert_equal(["S000000029"], @norm.match("FUN21"))
28
28
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
29
29
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
30
- assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
31
- assert_equal([], @norm.match("GER4"))
32
-
33
- @norm.match("FUN21")
30
+ assert_equal(["S000000029"].sort, @norm.match("FUN 21").sort)
31
+ assert_equal([], @norm.match("Non-sense"))
32
+ assert_equal(["S000000029", "S000000374"], @norm.match("GER4"))
34
33
  end
35
34
 
36
35
  def test_select
@@ -74,6 +74,7 @@ C2;11;22;3 3;bb
74
74
  index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
75
75
 
76
76
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
+ assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
78
  end
78
79
  end
79
80
 
@@ -1,9 +1,19 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/genia/sentence_splitter'
3
3
 
4
- class TestClass < Test::Unit::TestCase
5
- def test_true
6
- assert true
4
+ class TestNLP < Test::Unit::TestCase
5
+ def test_sentences
6
+ text =<<-EOF
7
+ This is a sentence.
8
+ A funky character ™ in a sentence.
9
+ This is a sentence.
10
+ This is a
11
+ sentence. This is
12
+ another sentence.
13
+ EOF
14
+
15
+ assert_equal "This is a \nsentence.", NLP.geniass_sentence_splitter(text)[3]
7
16
  end
17
+
8
18
  end
9
19
 
@@ -1,6 +1,6 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/open_nlp/sentence_splitter'
3
- require 'rbbt/ner/segment'
3
+ require 'rbbt/segment'
4
4
 
5
5
  $text=<<-EOF
6
6
  Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
@@ -22,6 +22,22 @@ class TestClass < Test::Unit::TestCase
22
22
  def test_sentences
23
23
  text =<<-EOF
24
24
  This is a sentence.
25
+ No funky character in this sentence.
26
+ This is a sentence.
27
+ This is a
28
+ sentence. This is
29
+ another sentence.
30
+ EOF
31
+
32
+ assert_equal 5, OpenNLP.sentence_split_detector.sentDetect(text).length
33
+
34
+ assert_equal 5, OpenNLP.sentence_splitter(text).length
35
+ assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
+ end
37
+
38
+ def test_sentences_fix_utf8
39
+ text =<<-EOF
40
+ This is a sentence.
25
41
  A funky character ™ in a sentence.
26
42
  This is a sentence.
27
43
  This is a
@@ -35,12 +51,12 @@ another sentence.
35
51
  assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
52
  end
37
53
 
38
- def _test_text_sentences
54
+ def test_text_sentences
39
55
  Misc.benchmark(100) do
40
- OpenNLP.sentence_splitter($text).include? "Our
56
+ assert OpenNLP.sentence_splitter($text).include?("Our
41
57
  findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
42
58
  AT/RT and the usefulness of antibodies directed against SMARCA4 in this
43
- diagnostic setting."
59
+ diagnostic setting.")
44
60
  end
45
61
  end
46
62
  end
@@ -0,0 +1,40 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/annotation'
6
+
7
+ class TestAnnotation < Test::Unit::TestCase
8
+ def test_annotation
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
13
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
14
+
15
+ assert_equal 'verb', annotation.annotid.split(":").last
16
+
17
+ annotation = SegmentAnnotation.setup(segment.segid, :type => :verb)
18
+ assert_equal 'verb', annotation.annotid.split(":").last
19
+ end
20
+
21
+ def test_annotid
22
+ text = "This is a document"
23
+ Document.setup(text, "TEST", "test_doc1", nil)
24
+
25
+ corpus = {}
26
+ corpus.extend Document::Corpus
27
+
28
+ corpus.add_document(text)
29
+
30
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
31
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
32
+
33
+ annotid = annotation.annotid(corpus)
34
+
35
+ assert_equal 'verb', annotid.type
36
+ assert_equal 'verb', annotid.annotation.type
37
+ assert_equal 'is', annotid.annotation
38
+ end
39
+ end
40
+
@@ -0,0 +1,36 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/corpus'
6
+
7
+ class TestSegmentCorpus < Test::Unit::TestCase
8
+ def test_corpus
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ corpus = {}
13
+ corpus.extend Document::Corpus
14
+
15
+ corpus.add_document(text)
16
+
17
+ docid = text.docid(corpus)
18
+
19
+ assert_equal docid.document, text
20
+ end
21
+
22
+ def test_find
23
+ text = "This is a document"
24
+ Document.setup(text, "TEST", "test_doc1", nil)
25
+
26
+ TmpFile.with_file do |path|
27
+ corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
28
+ corpus.extend Document::Corpus
29
+
30
+ corpus.add_document(text)
31
+
32
+ assert corpus.prefix("TEST:").include?(text.docid)
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,24 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment/encoding'
3
+
4
+ class TestEncoding < Test::Unit::TestCase
5
+ def _test_bad_chars
6
+ text = "A funky character ™ in a sentence."
7
+
8
+ assert_equal ["™"], Segment.bad_chars(text)
9
+ end
10
+
11
+ def test_ascii
12
+ text = "A funky character ™ in a sentence."
13
+
14
+ Segment.ascii(text) do
15
+ assert_equal "A funky character ? in a sentence.", text
16
+ end
17
+
18
+ Segment.ascii(text, "NONASCII") do
19
+ assert_equal "A funky character NONASCII in a sentence.", text
20
+ end
21
+
22
+ assert_equal "A funky character ™ in a sentence.", text
23
+ end
24
+ end
@@ -1,6 +1,6 @@
1
- require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/text/segment'
3
- require 'rbbt/text/segment/named_entity'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment'
3
+ require 'rbbt/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_info
@@ -15,11 +15,14 @@ class TestClass < Test::Unit::TestCase
15
15
 
16
16
  def test_all_args
17
17
  a = "test"
18
- NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
18
+ NamedEntity.setup a, 10, "TEST:doc1:test_type:hash", "NamedEntity", "TYPE", "CODE", "SCORE"
19
19
  assert_equal 10, a.offset
20
+ assert_equal "NamedEntity", a.type
21
+ assert_equal "TYPE", a.entity_type
22
+ assert_equal "SCORE", a.score
20
23
  end
21
24
 
22
- def test_tsv
25
+ def __test_tsv
23
26
  a = "test"
24
27
  NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
25
28
  assert Segment.tsv([a]).fields.include? "code"
@@ -27,23 +30,23 @@ class TestClass < Test::Unit::TestCase
27
30
  assert Segment.tsv([a], "literal").fields.include? "code"
28
31
  end
29
32
 
30
- def test_segment_brat
33
+ def __test_segment_brat
31
34
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
32
35
 
33
36
  gene1 = "TP53"
34
37
  gene1.extend NamedEntity
35
38
  gene1.offset = a.index gene1
36
- gene1.type = "Gene"
39
+ gene1.entity_type = "Gene"
37
40
 
38
41
  gene2 = "CDK5R1"
39
42
  gene2.extend NamedEntity
40
43
  gene2.offset = a.index gene2
41
- gene2.type = "Gene"
44
+ gene2.entity_type = "Gene"
42
45
 
43
46
  gene3 = "TP53 gene"
44
47
  gene3.extend NamedEntity
45
48
  gene3.offset = a.index gene3
46
- gene3.type = "Gene"
49
+ gene3.entity_type = "Gene"
47
50
 
48
51
  segments = [gene1, gene2, gene3]
49
52
  assert segments.collect{|s| s.to_brat}.include? "Gene 27 35"
@@ -0,0 +1,69 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment'
3
+ require 'rbbt/segment/overlaps'
4
+
5
+ class TestOverlaps < Test::Unit::TestCase
6
+ def setup
7
+ @text = <<-EOF
8
+ This is a first sentence. More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
9
+ EOF
10
+
11
+ @entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].collect do |literal|
12
+ Segment.setup(literal, :offset => @text.index(literal))
13
+ end
14
+
15
+ @sentences = @text.partition(".").values_at(0, 2).collect do |sentence|
16
+ Segment.setup sentence, :offset => @text.index(sentence)
17
+ end
18
+ end
19
+
20
+ def test_make_relative
21
+ sentence = @sentences[1]
22
+
23
+ @entities.each do |e|
24
+ assert_equal e, @text[e.range]
25
+ end
26
+
27
+ sentence.make_relative @entities do
28
+ @entities.each do |e|
29
+ assert_equal e, sentence[e.range]
30
+ end
31
+
32
+ @entities.each do |e|
33
+ assert_not_equal e, @text[e.range]
34
+ end
35
+ end
36
+
37
+ @entities.each do |e|
38
+ assert_equal e, @text[e.range]
39
+ end
40
+ end
41
+
42
+ def test_range_in
43
+ sentence = @sentences[1]
44
+
45
+ @entities.each do |e|
46
+ assert_equal e.range_in(sentence).begin, sentence.index(e)
47
+ assert_equal e.range.begin - sentence.offset, sentence.index(e)
48
+ end
49
+ end
50
+
51
+ def test_includes
52
+ @entities.each do |e|
53
+ assert ! @sentences[0].include?(e)
54
+ assert @sentences[1].include?(e)
55
+ assert ! e.include?(@sentences[0])
56
+ assert ! e.include?(@sentences[1])
57
+ end
58
+ end
59
+
60
+ def test_overlaps?
61
+ @entities.each do |e|
62
+ assert ! @sentences[0].overlaps?(e)
63
+ assert @sentences[1].overlaps?(e)
64
+ assert ! e.overlaps?(@sentences[0])
65
+ assert e.overlaps?(@sentences[1])
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,43 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/range_index'
6
+
7
+ class TestRangeIndex < Test::Unit::TestCase
8
+ def test_segment_index
9
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ corpus = {}
13
+ corpus.extend Document::Corpus
14
+
15
+ corpus.add_document(text)
16
+
17
+ gene1 = "TP53"
18
+ gene1.extend Segment
19
+ gene1.offset = text.index gene1
20
+ gene1.docid = text.docid
21
+
22
+ gene2 = "CDK5R1"
23
+ gene2.extend Segment
24
+ gene2.offset = text.index gene2
25
+ gene2.docid = text.docid
26
+
27
+ gene3 = "TP53 gene"
28
+ gene3.extend Segment
29
+ gene3.offset = text.index gene1
30
+ gene3.docid = text.docid
31
+
32
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus)
33
+ assert_equal "CDK5R1", index[gene2.offset + 1].segment.first
34
+
35
+ TmpFile.with_file do |fwt|
36
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus, fwt)
37
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
38
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus, fwt)
39
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
40
+ end
41
+ end
42
+ end
43
+
@@ -1,10 +1,21 @@
1
- require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/text/segment/transformed'
3
- require 'rbbt/text/segment/named_entity'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment/transformed'
3
+ require 'rbbt/segment/named_entity'
4
4
  require 'rexml/document'
5
5
 
6
- class TestClass < Test::Unit::TestCase
7
- def test_sort
6
+ class TestTransformed < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @text = <<-EOF
10
+ More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
11
+ EOF
12
+
13
+ @entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].collect do |literal|
14
+ NamedEntity.setup(literal, :offset => @text.index(literal))
15
+ end
16
+ end
17
+
18
+ def test_transform
8
19
  text = <<-EOF
9
20
  More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
10
21
  EOF
@@ -13,52 +24,25 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
13
24
  NamedEntity.setup(literal, :offset => text.index(literal))
14
25
  end
15
26
 
16
- Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
17
- assert text.include? "such as [IL-2]"
18
- end
27
+ Transformed.transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" })
28
+ assert text.include? "such as [IL-2]"
19
29
  end
20
30
 
21
- def ___test_transform
22
- a = "This sentence mentions the TP53 gene and the CDK5 protein"
23
- original = a.dup
24
-
25
- gene1 = "TP53"
26
- gene1.extend Segment
27
- gene1.offset = a.index gene1
28
-
29
- gene2 = "CDK5"
30
- gene2.extend Segment
31
- gene2.offset = a.index gene2
32
-
33
- assert_equal gene1, a[gene1.range]
34
- assert_equal gene2, a[gene2.range]
35
-
36
- c = a.dup
37
-
38
- c[gene2.range] = "GN"
39
- assert_equal c, Transformed.transform(a,[gene2], "GN")
40
- c[gene1.range] = "GN"
41
- assert_equal c, Transformed.transform(a,[gene1], "GN")
42
-
43
- iii a.transformation_offset_differences
44
- raise
45
- assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
46
- assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
47
-
48
-
49
- gene3 = "GN gene"
50
- gene3.extend Segment
51
- gene3.offset = a.index gene3
52
-
53
- assert_equal gene3, a[gene3.range]
31
+ def test_with_transform
32
+ text = <<-EOF
33
+ More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
34
+ EOF
54
35
 
55
- a.restore([gene3])
56
- assert_equal original, a
57
- assert_equal "TP53 gene", a[gene3.range]
36
+ entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].reverse.collect do |literal|
37
+ NamedEntity.setup(literal, :offset => text.index(literal))
38
+ end
58
39
 
40
+ Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
41
+ assert text.include? "such as [IL-2]"
42
+ end
59
43
  end
60
44
 
61
- def test_with_transform
45
+ def test_with_transform_2
62
46
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
63
47
  original = a.dup
64
48
 
@@ -123,12 +107,12 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
123
107
  gene1 = "TP53"
124
108
  gene1.extend NamedEntity
125
109
  gene1.offset = a.index gene1
126
- gene1.type = "Gene"
110
+ gene1.entity_type = "Gene"
127
111
 
128
112
  gene2 = "CDK5R1"
129
113
  gene2.extend NamedEntity
130
114
  gene2.offset = a.index gene2
131
- gene2.type = "Protein"
115
+ gene2.entity_type = "Protein"
132
116
 
133
117
  Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
134
118
  assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
@@ -143,13 +127,13 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
143
127
  gene1.extend NamedEntity
144
128
  gene1.offset = a.index gene1
145
129
  gene1.offset += 10
146
- gene1.type = "Gene"
130
+ gene1.entity_type = "Gene"
147
131
 
148
132
  gene2 = "CDK5R1"
149
133
  gene2.extend NamedEntity
150
134
  gene2.offset = a.index gene2
151
135
  gene2.offset += 10
152
- gene2.type = "Protein"
136
+ gene2.entity_type = "Protein"
153
137
 
154
138
  Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
155
139
  assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
@@ -162,12 +146,12 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
162
146
  gene1 = "TP53"
163
147
  gene1.extend NamedEntity
164
148
  gene1.offset = a.index gene1
165
- gene1.type = "Gene"
149
+ gene1.entity_type = "Gene"
166
150
 
167
151
  gene2 = "TP53 gene"
168
152
  gene2.extend NamedEntity
169
153
  gene2.offset = a.index gene2
170
- gene2.type = "Expanded Gene"
154
+ gene2.entity_type = "Expanded Gene"
171
155
 
172
156
  assert_equal [gene1], Segment.overlaps(Segment.sort([gene1,gene2]))
173
157
 
@@ -379,5 +363,46 @@ This is another sentence. Among the nonstructural proteins, the leader protein (
379
363
  end
380
364
  end
381
365
  end
366
+
367
+ def ___test_transform
368
+ a = "This sentence mentions the TP53 gene and the CDK5 protein"
369
+ original = a.dup
370
+
371
+ gene1 = "TP53"
372
+ gene1.extend Segment
373
+ gene1.offset = a.index gene1
374
+
375
+ gene2 = "CDK5"
376
+ gene2.extend Segment
377
+ gene2.offset = a.index gene2
378
+
379
+ assert_equal gene1, a[gene1.range]
380
+ assert_equal gene2, a[gene2.range]
381
+
382
+ c = a.dup
383
+
384
+ c[gene2.range] = "GN"
385
+ assert_equal c, Transformed.transform(a,[gene2], "GN")
386
+ c[gene1.range] = "GN"
387
+ assert_equal c, Transformed.transform(a,[gene1], "GN")
388
+
389
+ iii a.transformation_offset_differences
390
+ raise
391
+ assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
392
+ assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
393
+
394
+
395
+ gene3 = "GN gene"
396
+ gene3.extend Segment
397
+ gene3.offset = a.index gene3
398
+
399
+ assert_equal gene3, a[gene3.range]
400
+
401
+ a.restore([gene3])
402
+ assert_equal original, a
403
+ assert_equal "TP53 gene", a[gene3.range]
404
+
405
+ end
406
+
382
407
  end
383
408