rbbt-text 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/document.rb +46 -0
  3. data/lib/rbbt/document/annotation.rb +42 -0
  4. data/lib/rbbt/document/corpus.rb +38 -0
  5. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  6. data/lib/rbbt/ner/NER.rb +3 -3
  7. data/lib/rbbt/ner/abner.rb +1 -1
  8. data/lib/rbbt/ner/banner.rb +1 -1
  9. data/lib/rbbt/ner/brat.rb +1 -1
  10. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  11. data/lib/rbbt/ner/g_norm_plus.rb +19 -2
  12. data/lib/rbbt/ner/linnaeus.rb +3 -3
  13. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  14. data/lib/rbbt/ner/oscar3.rb +1 -2
  15. data/lib/rbbt/ner/oscar4.rb +3 -3
  16. data/lib/rbbt/ner/patterns.rb +6 -5
  17. data/lib/rbbt/ner/regexpNER.rb +1 -2
  18. data/lib/rbbt/ner/token_trieNER.rb +6 -6
  19. data/lib/rbbt/nlp/genia/sentence_splitter.rb +1 -1
  20. data/lib/rbbt/nlp/nlp.rb +5 -5
  21. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  22. data/lib/rbbt/segment.rb +177 -0
  23. data/lib/rbbt/segment/annotation.rb +58 -0
  24. data/lib/rbbt/segment/encoding.rb +18 -0
  25. data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -11
  26. data/lib/rbbt/segment/overlaps.rb +63 -0
  27. data/lib/rbbt/segment/range_index.rb +35 -0
  28. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  29. data/lib/rbbt/segment/token.rb +23 -0
  30. data/lib/rbbt/{text/segment → segment}/transformed.rb +7 -9
  31. data/lib/rbbt/segment/tsv.rb +41 -0
  32. data/share/install/software/Linnaeus +1 -1
  33. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  34. data/test/rbbt/document/test_annotation.rb +140 -0
  35. data/test/rbbt/document/test_corpus.rb +33 -0
  36. data/test/rbbt/ner/test_finder.rb +3 -3
  37. data/test/rbbt/ner/test_g_norm_plus.rb +11 -1
  38. data/test/rbbt/ner/test_patterns.rb +9 -9
  39. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  40. data/test/rbbt/ner/test_rnorm.rb +3 -4
  41. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  42. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +13 -3
  43. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  44. data/test/rbbt/segment/test_annotation.rb +40 -0
  45. data/test/rbbt/segment/test_corpus.rb +36 -0
  46. data/test/rbbt/segment/test_encoding.rb +24 -0
  47. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +12 -9
  48. data/test/rbbt/segment/test_overlaps.rb +69 -0
  49. data/test/rbbt/segment/test_range_index.rb +43 -0
  50. data/test/rbbt/{text/segment → segment}/test_transformed.rb +76 -51
  51. data/test/rbbt/test_document.rb +14 -0
  52. data/test/rbbt/test_segment.rb +187 -0
  53. data/test/test_helper.rb +5 -3
  54. metadata +40 -32
  55. data/lib/rbbt/text/corpus.rb +0 -106
  56. data/lib/rbbt/text/corpus/document.rb +0 -383
  57. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  58. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  59. data/lib/rbbt/text/document.rb +0 -39
  60. data/lib/rbbt/text/segment.rb +0 -363
  61. data/lib/rbbt/text/segment/docid.rb +0 -46
  62. data/lib/rbbt/text/segment/relationship.rb +0 -24
  63. data/lib/rbbt/text/segment/token.rb +0 -49
  64. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  65. data/test/rbbt/text/corpus/test_document.rb +0 -82
  66. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  67. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  68. data/test/rbbt/text/test_corpus.rb +0 -34
  69. data/test/rbbt/text/test_document.rb +0 -58
  70. data/test/rbbt/text/test_segment.rb +0 -100
@@ -23,9 +23,9 @@ class TestRegExpNER < Test::Unit::TestCase
23
23
  matches = RegExpNER.match_regexp_hash(sentence, regexp_hash)
24
24
 
25
25
  assert_equal ["this", "this", "that"].sort, matches.sort
26
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
27
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
28
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
26
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
27
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
28
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
29
29
  end
30
30
 
31
31
  def test_define_regexps
@@ -39,9 +39,9 @@ class TestRegExpNER < Test::Unit::TestCase
39
39
 
40
40
  matches = ner.entities(sentence)
41
41
  assert_equal ["this", "this", "that"].sort, matches.sort
42
- assert_equal "In ".length, matches.select{|m| m.type == :this }[0].offset
43
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this }[1].offset
44
- assert_equal :this, matches.select{|m| m.type == :this }[0].type
42
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this }[0].offset
43
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this }[1].offset
44
+ assert_equal :this, matches.select{|m| m.entity_type == :this }[0].entity_type
45
45
  end
46
46
 
47
47
 
@@ -51,9 +51,9 @@ class TestRegExpNER < Test::Unit::TestCase
51
51
  ner = RegExpNER.new({:this => /this/, :that => /that/})
52
52
  matches = ner.entities(sentence)
53
53
  assert_equal ["this", "this", "that"].sort, matches.sort
54
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
55
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
56
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
54
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
55
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
56
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
57
57
 
58
58
  Segmented.setup(sentence)
59
59
  ner_this = RegExpNER.new({:this => /this/})
@@ -64,9 +64,9 @@ class TestRegExpNER < Test::Unit::TestCase
64
64
  matches = sentence.segments
65
65
 
66
66
  assert_equal ["this", "this", "that"].sort, matches.sort
67
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
68
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
69
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
67
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
68
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
69
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
70
70
  end
71
71
 
72
72
  def test_entities_captures
@@ -75,8 +75,8 @@ class TestRegExpNER < Test::Unit::TestCase
75
75
  ner = RegExpNER.new({:this => /this/, :that => /that/, :should => /I (should)/})
76
76
  matches = ner.entities(sentence)
77
77
  assert_equal ["this", "this", "that", "should"].sort, matches.sort
78
- assert_equal "In this sentence I ".length, matches.select{|m| m.type == :should}[0].offset
79
- assert_equal :should, matches.select{|m| m.type == :should}[0].type
78
+ assert_equal "In this sentence I ".length, matches.select{|m| m.entity_type == :should}[0].offset
79
+ assert_equal :should, matches.select{|m| m.entity_type == :should}[0].entity_type
80
80
  end
81
81
 
82
82
 
@@ -27,10 +27,9 @@ S000000376 AAA GENE1 DDD
27
27
  assert_equal(["S000000029"], @norm.match("FUN21"))
28
28
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
29
29
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
30
- assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
31
- assert_equal([], @norm.match("GER4"))
32
-
33
- @norm.match("FUN21")
30
+ assert_equal(["S000000029"].sort, @norm.match("FUN 21").sort)
31
+ assert_equal([], @norm.match("Non-sense"))
32
+ assert_equal(["S000000029", "S000000374"], @norm.match("GER4"))
34
33
  end
35
34
 
36
35
  def test_select
@@ -74,6 +74,7 @@ C2;11;22;3 3;bb
74
74
  index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
75
75
 
76
76
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
+ assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
78
  end
78
79
  end
79
80
 
@@ -1,9 +1,19 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/genia/sentence_splitter'
3
3
 
4
- class TestClass < Test::Unit::TestCase
5
- def test_true
6
- assert true
4
+ class TestNLP < Test::Unit::TestCase
5
+ def test_sentences
6
+ text =<<-EOF
7
+ This is a sentence.
8
+ A funky character ™ in a sentence.
9
+ This is a sentence.
10
+ This is a
11
+ sentence. This is
12
+ another sentence.
13
+ EOF
14
+
15
+ assert_equal "This is a \nsentence.", NLP.geniass_sentence_splitter(text)[3]
7
16
  end
17
+
8
18
  end
9
19
 
@@ -1,6 +1,6 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/open_nlp/sentence_splitter'
3
- require 'rbbt/ner/segment'
3
+ require 'rbbt/segment'
4
4
 
5
5
  $text=<<-EOF
6
6
  Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
@@ -22,6 +22,22 @@ class TestClass < Test::Unit::TestCase
22
22
  def test_sentences
23
23
  text =<<-EOF
24
24
  This is a sentence.
25
+ No funky character in this sentence.
26
+ This is a sentence.
27
+ This is a
28
+ sentence. This is
29
+ another sentence.
30
+ EOF
31
+
32
+ assert_equal 5, OpenNLP.sentence_split_detector.sentDetect(text).length
33
+
34
+ assert_equal 5, OpenNLP.sentence_splitter(text).length
35
+ assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
+ end
37
+
38
+ def test_sentences_fix_utf8
39
+ text =<<-EOF
40
+ This is a sentence.
25
41
  A funky character ™ in a sentence.
26
42
  This is a sentence.
27
43
  This is a
@@ -35,12 +51,12 @@ another sentence.
35
51
  assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
52
  end
37
53
 
38
- def _test_text_sentences
54
+ def test_text_sentences
39
55
  Misc.benchmark(100) do
40
- OpenNLP.sentence_splitter($text).include? "Our
56
+ assert OpenNLP.sentence_splitter($text).include?("Our
41
57
  findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
42
58
  AT/RT and the usefulness of antibodies directed against SMARCA4 in this
43
- diagnostic setting."
59
+ diagnostic setting.")
44
60
  end
45
61
  end
46
62
  end
@@ -0,0 +1,40 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/annotation'
6
+
7
+ class TestAnnotation < Test::Unit::TestCase
8
+ def test_annotation
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
13
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
14
+
15
+ assert_equal 'verb', annotation.annotid.split(":").last
16
+
17
+ annotation = SegmentAnnotation.setup(segment.segid, :type => :verb)
18
+ assert_equal 'verb', annotation.annotid.split(":").last
19
+ end
20
+
21
+ def test_annotid
22
+ text = "This is a document"
23
+ Document.setup(text, "TEST", "test_doc1", nil)
24
+
25
+ corpus = {}
26
+ corpus.extend Document::Corpus
27
+
28
+ corpus.add_document(text)
29
+
30
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
31
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
32
+
33
+ annotid = annotation.annotid(corpus)
34
+
35
+ assert_equal 'verb', annotid.type
36
+ assert_equal 'verb', annotid.annotation.type
37
+ assert_equal 'is', annotid.annotation
38
+ end
39
+ end
40
+
@@ -0,0 +1,36 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/corpus'
6
+
7
+ class TestSegmentCorpus < Test::Unit::TestCase
8
+ def test_corpus
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ corpus = {}
13
+ corpus.extend Document::Corpus
14
+
15
+ corpus.add_document(text)
16
+
17
+ docid = text.docid(corpus)
18
+
19
+ assert_equal docid.document, text
20
+ end
21
+
22
+ def test_find
23
+ text = "This is a document"
24
+ Document.setup(text, "TEST", "test_doc1", nil)
25
+
26
+ TmpFile.with_file do |path|
27
+ corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
28
+ corpus.extend Document::Corpus
29
+
30
+ corpus.add_document(text)
31
+
32
+ assert corpus.prefix("TEST:").include?(text.docid)
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,24 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment/encoding'
3
+
4
+ class TestEncoding < Test::Unit::TestCase
5
+ def _test_bad_chars
6
+ text = "A funky character ™ in a sentence."
7
+
8
+ assert_equal ["™"], Segment.bad_chars(text)
9
+ end
10
+
11
+ def test_ascii
12
+ text = "A funky character ™ in a sentence."
13
+
14
+ Segment.ascii(text) do
15
+ assert_equal "A funky character ? in a sentence.", text
16
+ end
17
+
18
+ Segment.ascii(text, "NONASCII") do
19
+ assert_equal "A funky character NONASCII in a sentence.", text
20
+ end
21
+
22
+ assert_equal "A funky character ™ in a sentence.", text
23
+ end
24
+ end
@@ -1,6 +1,6 @@
1
- require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/text/segment'
3
- require 'rbbt/text/segment/named_entity'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment'
3
+ require 'rbbt/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_info
@@ -15,11 +15,14 @@ class TestClass < Test::Unit::TestCase
15
15
 
16
16
  def test_all_args
17
17
  a = "test"
18
- NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
18
+ NamedEntity.setup a, 10, "TEST:doc1:test_type:hash", "NamedEntity", "TYPE", "CODE", "SCORE"
19
19
  assert_equal 10, a.offset
20
+ assert_equal "NamedEntity", a.type
21
+ assert_equal "TYPE", a.entity_type
22
+ assert_equal "SCORE", a.score
20
23
  end
21
24
 
22
- def test_tsv
25
+ def __test_tsv
23
26
  a = "test"
24
27
  NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
25
28
  assert Segment.tsv([a]).fields.include? "code"
@@ -27,23 +30,23 @@ class TestClass < Test::Unit::TestCase
27
30
  assert Segment.tsv([a], "literal").fields.include? "code"
28
31
  end
29
32
 
30
- def test_segment_brat
33
+ def __test_segment_brat
31
34
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
32
35
 
33
36
  gene1 = "TP53"
34
37
  gene1.extend NamedEntity
35
38
  gene1.offset = a.index gene1
36
- gene1.type = "Gene"
39
+ gene1.entity_type = "Gene"
37
40
 
38
41
  gene2 = "CDK5R1"
39
42
  gene2.extend NamedEntity
40
43
  gene2.offset = a.index gene2
41
- gene2.type = "Gene"
44
+ gene2.entity_type = "Gene"
42
45
 
43
46
  gene3 = "TP53 gene"
44
47
  gene3.extend NamedEntity
45
48
  gene3.offset = a.index gene3
46
- gene3.type = "Gene"
49
+ gene3.entity_type = "Gene"
47
50
 
48
51
  segments = [gene1, gene2, gene3]
49
52
  assert segments.collect{|s| s.to_brat}.include? "Gene 27 35"
@@ -0,0 +1,69 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment'
3
+ require 'rbbt/segment/overlaps'
4
+
5
+ class TestOverlaps < Test::Unit::TestCase
6
+ def setup
7
+ @text = <<-EOF
8
+ This is a first sentence. More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
9
+ EOF
10
+
11
+ @entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].collect do |literal|
12
+ Segment.setup(literal, :offset => @text.index(literal))
13
+ end
14
+
15
+ @sentences = @text.partition(".").values_at(0, 2).collect do |sentence|
16
+ Segment.setup sentence, :offset => @text.index(sentence)
17
+ end
18
+ end
19
+
20
+ def test_make_relative
21
+ sentence = @sentences[1]
22
+
23
+ @entities.each do |e|
24
+ assert_equal e, @text[e.range]
25
+ end
26
+
27
+ sentence.make_relative @entities do
28
+ @entities.each do |e|
29
+ assert_equal e, sentence[e.range]
30
+ end
31
+
32
+ @entities.each do |e|
33
+ assert_not_equal e, @text[e.range]
34
+ end
35
+ end
36
+
37
+ @entities.each do |e|
38
+ assert_equal e, @text[e.range]
39
+ end
40
+ end
41
+
42
+ def test_range_in
43
+ sentence = @sentences[1]
44
+
45
+ @entities.each do |e|
46
+ assert_equal e.range_in(sentence).begin, sentence.index(e)
47
+ assert_equal e.range.begin - sentence.offset, sentence.index(e)
48
+ end
49
+ end
50
+
51
+ def test_includes
52
+ @entities.each do |e|
53
+ assert ! @sentences[0].include?(e)
54
+ assert @sentences[1].include?(e)
55
+ assert ! e.include?(@sentences[0])
56
+ assert ! e.include?(@sentences[1])
57
+ end
58
+ end
59
+
60
+ def test_overlaps?
61
+ @entities.each do |e|
62
+ assert ! @sentences[0].overlaps?(e)
63
+ assert @sentences[1].overlaps?(e)
64
+ assert ! e.overlaps?(@sentences[0])
65
+ assert e.overlaps?(@sentences[1])
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,43 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/range_index'
6
+
7
+ class TestRangeIndex < Test::Unit::TestCase
8
+ def test_segment_index
9
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ corpus = {}
13
+ corpus.extend Document::Corpus
14
+
15
+ corpus.add_document(text)
16
+
17
+ gene1 = "TP53"
18
+ gene1.extend Segment
19
+ gene1.offset = text.index gene1
20
+ gene1.docid = text.docid
21
+
22
+ gene2 = "CDK5R1"
23
+ gene2.extend Segment
24
+ gene2.offset = text.index gene2
25
+ gene2.docid = text.docid
26
+
27
+ gene3 = "TP53 gene"
28
+ gene3.extend Segment
29
+ gene3.offset = text.index gene1
30
+ gene3.docid = text.docid
31
+
32
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus)
33
+ assert_equal "CDK5R1", index[gene2.offset + 1].segment.first
34
+
35
+ TmpFile.with_file do |fwt|
36
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus, fwt)
37
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
38
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus, fwt)
39
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
40
+ end
41
+ end
42
+ end
43
+
@@ -1,10 +1,21 @@
1
- require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/text/segment/transformed'
3
- require 'rbbt/text/segment/named_entity'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment/transformed'
3
+ require 'rbbt/segment/named_entity'
4
4
  require 'rexml/document'
5
5
 
6
- class TestClass < Test::Unit::TestCase
7
- def test_sort
6
+ class TestTransformed < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @text = <<-EOF
10
+ More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
11
+ EOF
12
+
13
+ @entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].collect do |literal|
14
+ NamedEntity.setup(literal, :offset => @text.index(literal))
15
+ end
16
+ end
17
+
18
+ def test_transform
8
19
  text = <<-EOF
9
20
  More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
10
21
  EOF
@@ -13,52 +24,25 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
13
24
  NamedEntity.setup(literal, :offset => text.index(literal))
14
25
  end
15
26
 
16
- Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
17
- assert text.include? "such as [IL-2]"
18
- end
27
+ Transformed.transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" })
28
+ assert text.include? "such as [IL-2]"
19
29
  end
20
30
 
21
- def ___test_transform
22
- a = "This sentence mentions the TP53 gene and the CDK5 protein"
23
- original = a.dup
24
-
25
- gene1 = "TP53"
26
- gene1.extend Segment
27
- gene1.offset = a.index gene1
28
-
29
- gene2 = "CDK5"
30
- gene2.extend Segment
31
- gene2.offset = a.index gene2
32
-
33
- assert_equal gene1, a[gene1.range]
34
- assert_equal gene2, a[gene2.range]
35
-
36
- c = a.dup
37
-
38
- c[gene2.range] = "GN"
39
- assert_equal c, Transformed.transform(a,[gene2], "GN")
40
- c[gene1.range] = "GN"
41
- assert_equal c, Transformed.transform(a,[gene1], "GN")
42
-
43
- iii a.transformation_offset_differences
44
- raise
45
- assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
46
- assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
47
-
48
-
49
- gene3 = "GN gene"
50
- gene3.extend Segment
51
- gene3.offset = a.index gene3
52
-
53
- assert_equal gene3, a[gene3.range]
31
+ def test_with_transform
32
+ text = <<-EOF
33
+ More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
34
+ EOF
54
35
 
55
- a.restore([gene3])
56
- assert_equal original, a
57
- assert_equal "TP53 gene", a[gene3.range]
36
+ entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].reverse.collect do |literal|
37
+ NamedEntity.setup(literal, :offset => text.index(literal))
38
+ end
58
39
 
40
+ Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
41
+ assert text.include? "such as [IL-2]"
42
+ end
59
43
  end
60
44
 
61
- def test_with_transform
45
+ def test_with_transform_2
62
46
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
63
47
  original = a.dup
64
48
 
@@ -123,12 +107,12 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
123
107
  gene1 = "TP53"
124
108
  gene1.extend NamedEntity
125
109
  gene1.offset = a.index gene1
126
- gene1.type = "Gene"
110
+ gene1.entity_type = "Gene"
127
111
 
128
112
  gene2 = "CDK5R1"
129
113
  gene2.extend NamedEntity
130
114
  gene2.offset = a.index gene2
131
- gene2.type = "Protein"
115
+ gene2.entity_type = "Protein"
132
116
 
133
117
  Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
134
118
  assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
@@ -143,13 +127,13 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
143
127
  gene1.extend NamedEntity
144
128
  gene1.offset = a.index gene1
145
129
  gene1.offset += 10
146
- gene1.type = "Gene"
130
+ gene1.entity_type = "Gene"
147
131
 
148
132
  gene2 = "CDK5R1"
149
133
  gene2.extend NamedEntity
150
134
  gene2.offset = a.index gene2
151
135
  gene2.offset += 10
152
- gene2.type = "Protein"
136
+ gene2.entity_type = "Protein"
153
137
 
154
138
  Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
155
139
  assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
@@ -162,12 +146,12 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
162
146
  gene1 = "TP53"
163
147
  gene1.extend NamedEntity
164
148
  gene1.offset = a.index gene1
165
- gene1.type = "Gene"
149
+ gene1.entity_type = "Gene"
166
150
 
167
151
  gene2 = "TP53 gene"
168
152
  gene2.extend NamedEntity
169
153
  gene2.offset = a.index gene2
170
- gene2.type = "Expanded Gene"
154
+ gene2.entity_type = "Expanded Gene"
171
155
 
172
156
  assert_equal [gene1], Segment.overlaps(Segment.sort([gene1,gene2]))
173
157
 
@@ -379,5 +363,46 @@ This is another sentence. Among the nonstructural proteins, the leader protein (
379
363
  end
380
364
  end
381
365
  end
366
+
367
+ def ___test_transform
368
+ a = "This sentence mentions the TP53 gene and the CDK5 protein"
369
+ original = a.dup
370
+
371
+ gene1 = "TP53"
372
+ gene1.extend Segment
373
+ gene1.offset = a.index gene1
374
+
375
+ gene2 = "CDK5"
376
+ gene2.extend Segment
377
+ gene2.offset = a.index gene2
378
+
379
+ assert_equal gene1, a[gene1.range]
380
+ assert_equal gene2, a[gene2.range]
381
+
382
+ c = a.dup
383
+
384
+ c[gene2.range] = "GN"
385
+ assert_equal c, Transformed.transform(a,[gene2], "GN")
386
+ c[gene1.range] = "GN"
387
+ assert_equal c, Transformed.transform(a,[gene1], "GN")
388
+
389
+ iii a.transformation_offset_differences
390
+ raise
391
+ assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
392
+ assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
393
+
394
+
395
+ gene3 = "GN gene"
396
+ gene3.extend Segment
397
+ gene3.offset = a.index gene3
398
+
399
+ assert_equal gene3, a[gene3.range]
400
+
401
+ a.restore([gene3])
402
+ assert_equal original, a
403
+ assert_equal "TP53 gene", a[gene3.range]
404
+
405
+ end
406
+
382
407
  end
383
408