rbbt-text 1.1.9 → 1.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/bow/bow.rb +5 -2
  3. data/lib/rbbt/bow/dictionary.rb +27 -23
  4. data/lib/rbbt/document.rb +56 -0
  5. data/lib/rbbt/document/annotation.rb +45 -0
  6. data/lib/rbbt/document/corpus.rb +61 -0
  7. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  8. data/lib/rbbt/ner/NER.rb +3 -3
  9. data/lib/rbbt/ner/abner.rb +1 -1
  10. data/lib/rbbt/ner/banner.rb +1 -1
  11. data/lib/rbbt/ner/brat.rb +1 -1
  12. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  13. data/lib/rbbt/ner/g_norm_plus.rb +42 -12
  14. data/lib/rbbt/ner/linnaeus.rb +3 -3
  15. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  16. data/lib/rbbt/ner/oscar3.rb +1 -2
  17. data/lib/rbbt/ner/oscar4.rb +3 -3
  18. data/lib/rbbt/ner/patterns.rb +5 -5
  19. data/lib/rbbt/ner/regexpNER.rb +1 -2
  20. data/lib/rbbt/ner/token_trieNER.rb +35 -22
  21. data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
  22. data/lib/rbbt/nlp/nlp.rb +5 -5
  23. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  24. data/lib/rbbt/nlp/spaCy.rb +52 -0
  25. data/lib/rbbt/segment.rb +179 -0
  26. data/lib/rbbt/segment/annotation.rb +58 -0
  27. data/lib/rbbt/segment/encoding.rb +18 -0
  28. data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -10
  29. data/lib/rbbt/segment/overlaps.rb +63 -0
  30. data/lib/rbbt/segment/range_index.rb +35 -0
  31. data/lib/rbbt/segment/relationship.rb +7 -0
  32. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  33. data/lib/rbbt/segment/token.rb +23 -0
  34. data/lib/rbbt/{text/segment → segment}/transformed.rb +10 -8
  35. data/lib/rbbt/segment/tsv.rb +41 -0
  36. data/share/install/software/Linnaeus +1 -1
  37. data/share/install/software/OpenNLP +1 -1
  38. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  39. data/test/rbbt/document/test_annotation.rb +140 -0
  40. data/test/rbbt/document/test_corpus.rb +33 -0
  41. data/test/rbbt/ner/test_finder.rb +3 -3
  42. data/test/rbbt/ner/test_g_norm_plus.rb +20 -3
  43. data/test/rbbt/ner/test_patterns.rb +9 -9
  44. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  45. data/test/rbbt/ner/test_rnorm.rb +3 -4
  46. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  47. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
  48. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  49. data/test/rbbt/segment/test_annotation.rb +39 -0
  50. data/test/rbbt/segment/test_corpus.rb +36 -0
  51. data/test/rbbt/segment/test_encoding.rb +24 -0
  52. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
  53. data/test/rbbt/segment/test_overlaps.rb +69 -0
  54. data/test/rbbt/segment/test_range_index.rb +42 -0
  55. data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
  56. data/test/rbbt/test_document.rb +14 -0
  57. data/test/rbbt/test_segment.rb +182 -0
  58. data/test/test_helper.rb +5 -3
  59. data/test/test_spaCy.rb +32 -0
  60. metadata +44 -32
  61. data/lib/rbbt/text/corpus.rb +0 -106
  62. data/lib/rbbt/text/corpus/document.rb +0 -361
  63. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  64. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  65. data/lib/rbbt/text/document.rb +0 -39
  66. data/lib/rbbt/text/segment.rb +0 -355
  67. data/lib/rbbt/text/segment/docid.rb +0 -46
  68. data/lib/rbbt/text/segment/relationship.rb +0 -24
  69. data/lib/rbbt/text/segment/token.rb +0 -49
  70. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  71. data/test/rbbt/text/corpus/test_document.rb +0 -52
  72. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  73. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  74. data/test/rbbt/text/test_corpus.rb +0 -34
  75. data/test/rbbt/text/test_document.rb +0 -58
  76. data/test/rbbt/text/test_segment.rb +0 -100
@@ -27,10 +27,9 @@ S000000376 AAA GENE1 DDD
27
27
  assert_equal(["S000000029"], @norm.match("FUN21"))
28
28
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
29
29
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
30
- assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
31
- assert_equal([], @norm.match("GER4"))
32
-
33
- @norm.match("FUN21")
30
+ assert_equal(["S000000029"].sort, @norm.match("FUN 21").sort)
31
+ assert_equal([], @norm.match("Non-sense"))
32
+ assert_equal(["S000000029", "S000000374"], @norm.match("GER4"))
34
33
  end
35
34
 
36
35
  def test_select
@@ -74,6 +74,7 @@ C2;11;22;3 3;bb
74
74
  index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
75
75
 
76
76
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
+ assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
78
  end
78
79
  end
79
80
 
@@ -1,9 +1,43 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/genia/sentence_splitter'
3
3
 
4
- class TestClass < Test::Unit::TestCase
5
- def test_true
6
- assert true
4
+ class TestNLP < Test::Unit::TestCase
5
+ def test_sentences
6
+ text =<<-EOF
7
+ This is a sentence.
8
+ A funky character ™ in a sentence.
9
+ This is a sentence.
10
+ This is a broken
11
+ sentence. This is
12
+ another broken sentence.
13
+ EOF
14
+
15
+ iii NLP.geniass_sentence_splitter(text)
16
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter(text)[2].strip
17
+ end
18
+
19
+ def test_sentences_2
20
+ text =<<-EOF
21
+ This is a sentence.
22
+ This is a sentence.
23
+ This is a broken
24
+ sentence. This is
25
+ another broken sentence.
26
+ EOF
27
+
28
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter(text)[2].strip
29
+ end
30
+
31
+ def test_sentences_ext
32
+ text =<<-EOF
33
+ This is a sentence.
34
+ This is a sentence.
35
+ This is a broken
36
+ sentence. This is
37
+ another broken sentence.
38
+ EOF
39
+
40
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter_extension(text)[2].strip
7
41
  end
8
42
  end
9
43
 
@@ -1,6 +1,6 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/open_nlp/sentence_splitter'
3
- require 'rbbt/ner/segment'
3
+ require 'rbbt/segment'
4
4
 
5
5
  $text=<<-EOF
6
6
  Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
@@ -22,6 +22,22 @@ class TestClass < Test::Unit::TestCase
22
22
  def test_sentences
23
23
  text =<<-EOF
24
24
  This is a sentence.
25
+ No funky character in this sentence.
26
+ This is a sentence.
27
+ This is a
28
+ sentence. This is
29
+ another sentence.
30
+ EOF
31
+
32
+ assert_equal 5, OpenNLP.sentence_split_detector.sentDetect(text).length
33
+
34
+ assert_equal 5, OpenNLP.sentence_splitter(text).length
35
+ assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
+ end
37
+
38
+ def test_sentences_fix_utf8
39
+ text =<<-EOF
40
+ This is a sentence.
25
41
  A funky character ™ in a sentence.
26
42
  This is a sentence.
27
43
  This is a
@@ -35,12 +51,12 @@ another sentence.
35
51
  assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
52
  end
37
53
 
38
- def _test_text_sentences
54
+ def test_text_sentences
39
55
  Misc.benchmark(100) do
40
- OpenNLP.sentence_splitter($text).include? "Our
56
+ assert OpenNLP.sentence_splitter($text).include?("Our
41
57
  findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
42
58
  AT/RT and the usefulness of antibodies directed against SMARCA4 in this
43
- diagnostic setting."
59
+ diagnostic setting.")
44
60
  end
45
61
  end
46
62
  end
@@ -0,0 +1,39 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/annotation'
6
+
7
+ class TestAnnotation < Test::Unit::TestCase
8
+ def test_annotation
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
13
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
14
+
15
+ assert_equal 'verb', annotation.annotid.split(":")[5]
16
+
17
+ annotation = SegmentAnnotation.setup(segment.segid, :type => :verb)
18
+ assert_equal 'verb', annotation.annotid.split(":")[5]
19
+ end
20
+
21
+ def test_annotid
22
+ text = "This is a document"
23
+ Document.setup(text, "TEST", "test_doc1", nil)
24
+
25
+ corpus = Document::Corpus.setup({})
26
+
27
+ corpus.add_document(text)
28
+
29
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
30
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
31
+
32
+ annotid = annotation.annotid(corpus)
33
+
34
+ assert_equal 'verb', annotid.type
35
+ assert_equal 'verb', annotid.annotation.type
36
+ assert_equal 'is', annotid.annotation
37
+ end
38
+ end
39
+
@@ -0,0 +1,36 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/corpus'
6
+
7
+ class TestSegmentCorpus < Test::Unit::TestCase
8
+ def test_corpus
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ corpus = {}
13
+ corpus.extend Document::Corpus
14
+
15
+ corpus.add_document(text)
16
+
17
+ docid = text.docid(corpus)
18
+
19
+ assert_equal docid.document, text
20
+ end
21
+
22
+ def test_find
23
+ text = "This is a document"
24
+ Document.setup(text, "TEST", "test_doc1", nil)
25
+
26
+ TmpFile.with_file do |path|
27
+ corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
28
+ corpus.extend Document::Corpus
29
+
30
+ corpus.add_document(text)
31
+
32
+ assert corpus.prefix("TEST:").include?(text.docid)
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,24 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment/encoding'
3
+
4
+ class TestEncoding < Test::Unit::TestCase
5
+ def test_bad_chars
6
+ text = "A funky character ™ in a sentence."
7
+
8
+ assert_equal ["™"], Segment.bad_chars(text)
9
+ end
10
+
11
+ def test_ascii
12
+ text = "A funky character ™ in a sentence."
13
+
14
+ Segment.ascii(text) do
15
+ assert_equal "A funky character ? in a sentence.", text
16
+ end
17
+
18
+ Segment.ascii(text, "NONASCII") do
19
+ assert_equal "A funky character NONASCII in a sentence.", text
20
+ end
21
+
22
+ assert_equal "A funky character ™ in a sentence.", text
23
+ end
24
+ end
@@ -1,6 +1,6 @@
1
- require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/text/segment'
3
- require 'rbbt/text/segment/named_entity'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment'
3
+ require 'rbbt/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_info
@@ -15,35 +15,39 @@ class TestClass < Test::Unit::TestCase
15
15
 
16
16
  def test_all_args
17
17
  a = "test"
18
- NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
18
+ NamedEntity.setup a, 10, "TEST:doc1:test_type:hash", "NamedEntity", "TYPE", "CODE", "SCORE"
19
19
  assert_equal 10, a.offset
20
+ assert_equal "NamedEntity", a.type
21
+ assert_equal "TYPE", a.entity_type
22
+ assert_equal "SCORE", a.score
20
23
  end
21
24
 
22
25
  def test_tsv
23
26
  a = "test"
24
27
  NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
25
- assert Segment.tsv([a]).fields.include? "code"
26
- assert Segment.tsv([a], nil).fields.include? "code"
27
- assert Segment.tsv([a], "literal").fields.include? "code"
28
+ assert Annotated.tsv([a]).fields.include? "code"
29
+ assert Annotated.tsv([a], nil).fields.include? "code"
30
+ assert Annotated.tsv([a], :all).fields.include? "code"
31
+ assert Annotated.tsv([a], :all).fields.include? "literal"
28
32
  end
29
33
 
30
- def test_segment_brat
34
+ def __test_segment_brat
31
35
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
32
36
 
33
37
  gene1 = "TP53"
34
38
  gene1.extend NamedEntity
35
39
  gene1.offset = a.index gene1
36
- gene1.type = "Gene"
40
+ gene1.entity_type = "Gene"
37
41
 
38
42
  gene2 = "CDK5R1"
39
43
  gene2.extend NamedEntity
40
44
  gene2.offset = a.index gene2
41
- gene2.type = "Gene"
45
+ gene2.entity_type = "Gene"
42
46
 
43
47
  gene3 = "TP53 gene"
44
48
  gene3.extend NamedEntity
45
49
  gene3.offset = a.index gene3
46
- gene3.type = "Gene"
50
+ gene3.entity_type = "Gene"
47
51
 
48
52
  segments = [gene1, gene2, gene3]
49
53
  assert segments.collect{|s| s.to_brat}.include? "Gene 27 35"
@@ -0,0 +1,69 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment'
3
+ require 'rbbt/segment/overlaps'
4
+
5
+ class TestOverlaps < Test::Unit::TestCase
6
+ def setup
7
+ @text = <<-EOF
8
+ This is a first sentence. More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
9
+ EOF
10
+
11
+ @entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].collect do |literal|
12
+ Segment.setup(literal, :offset => @text.index(literal))
13
+ end
14
+
15
+ @sentences = @text.partition(".").values_at(0, 2).collect do |sentence|
16
+ Segment.setup sentence, :offset => @text.index(sentence)
17
+ end
18
+ end
19
+
20
+ def test_make_relative
21
+ sentence = @sentences[1]
22
+
23
+ @entities.each do |e|
24
+ assert_equal e, @text[e.range]
25
+ end
26
+
27
+ sentence.make_relative @entities do
28
+ @entities.each do |e|
29
+ assert_equal e, sentence[e.range]
30
+ end
31
+
32
+ @entities.each do |e|
33
+ assert_not_equal e, @text[e.range]
34
+ end
35
+ end
36
+
37
+ @entities.each do |e|
38
+ assert_equal e, @text[e.range]
39
+ end
40
+ end
41
+
42
+ def test_range_in
43
+ sentence = @sentences[1]
44
+
45
+ @entities.each do |e|
46
+ assert_equal e.range_in(sentence).begin, sentence.index(e)
47
+ assert_equal e.range.begin - sentence.offset, sentence.index(e)
48
+ end
49
+ end
50
+
51
+ def test_includes
52
+ @entities.each do |e|
53
+ assert ! @sentences[0].include?(e)
54
+ assert @sentences[1].include?(e)
55
+ assert ! e.include?(@sentences[0])
56
+ assert ! e.include?(@sentences[1])
57
+ end
58
+ end
59
+
60
+ def test_overlaps?
61
+ @entities.each do |e|
62
+ assert ! @sentences[0].overlaps?(e)
63
+ assert @sentences[1].overlaps?(e)
64
+ assert ! e.overlaps?(@sentences[0])
65
+ assert e.overlaps?(@sentences[1])
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,42 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/range_index'
6
+
7
+ class TestRangeIndex < Test::Unit::TestCase
8
+ def test_segment_index
9
+ text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ corpus = Document::Corpus.setup({})
13
+
14
+ corpus.add_document(text)
15
+
16
+ gene1 = "TP53"
17
+ gene1.extend Segment
18
+ gene1.offset = text.index gene1
19
+ gene1.docid = text.docid
20
+
21
+ gene2 = "CDK5R1"
22
+ gene2.extend Segment
23
+ gene2.offset = text.index gene2
24
+ gene2.docid = text.docid
25
+
26
+ gene3 = "TP53 gene"
27
+ gene3.extend Segment
28
+ gene3.offset = text.index gene1
29
+ gene3.docid = text.docid
30
+
31
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus)
32
+ assert_equal "CDK5R1", index[gene2.offset + 1].segment.first
33
+
34
+ TmpFile.with_file do |fwt|
35
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus, fwt)
36
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
37
+ index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus, fwt)
38
+ assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
39
+ end
40
+ end
41
+ end
42
+
@@ -1,10 +1,21 @@
1
- require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/text/segment/transformed'
3
- require 'rbbt/text/segment/named_entity'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment/transformed'
3
+ require 'rbbt/segment/named_entity'
4
4
  require 'rexml/document'
5
5
 
6
- class TestClass < Test::Unit::TestCase
7
- def test_sort
6
+ class TestTransformed < Test::Unit::TestCase
7
+
8
+ def setup
9
+ @text = <<-EOF
10
+ More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
11
+ EOF
12
+
13
+ @entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].collect do |literal|
14
+ NamedEntity.setup(literal, :offset => @text.index(literal))
15
+ end
16
+ end
17
+
18
+ def test_transform
8
19
  text = <<-EOF
9
20
  More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
10
21
  EOF
@@ -13,52 +24,25 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
13
24
  NamedEntity.setup(literal, :offset => text.index(literal))
14
25
  end
15
26
 
16
- Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
17
- assert text.include? "such as [IL-2]"
18
- end
27
+ Transformed.transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" })
28
+ assert text.include? "such as [IL-2]"
19
29
  end
20
30
 
21
- def ___test_transform
22
- a = "This sentence mentions the TP53 gene and the CDK5 protein"
23
- original = a.dup
24
-
25
- gene1 = "TP53"
26
- gene1.extend Segment
27
- gene1.offset = a.index gene1
28
-
29
- gene2 = "CDK5"
30
- gene2.extend Segment
31
- gene2.offset = a.index gene2
32
-
33
- assert_equal gene1, a[gene1.range]
34
- assert_equal gene2, a[gene2.range]
35
-
36
- c = a.dup
37
-
38
- c[gene2.range] = "GN"
39
- assert_equal c, Transformed.transform(a,[gene2], "GN")
40
- c[gene1.range] = "GN"
41
- assert_equal c, Transformed.transform(a,[gene1], "GN")
42
-
43
- iii a.transformation_offset_differences
44
- raise
45
- assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
46
- assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
47
-
48
-
49
- gene3 = "GN gene"
50
- gene3.extend Segment
51
- gene3.offset = a.index gene3
52
-
53
- assert_equal gene3, a[gene3.range]
31
+ def test_with_transform
32
+ text = <<-EOF
33
+ More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
34
+ EOF
54
35
 
55
- a.restore([gene3])
56
- assert_equal original, a
57
- assert_equal "TP53 gene", a[gene3.range]
36
+ entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].reverse.collect do |literal|
37
+ NamedEntity.setup(literal, :offset => text.index(literal))
38
+ end
58
39
 
40
+ Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
41
+ assert text.include? "such as [IL-2]"
42
+ end
59
43
  end
60
44
 
61
- def test_with_transform
45
+ def test_with_transform_2
62
46
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
63
47
  original = a.dup
64
48
 
@@ -117,18 +101,47 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
117
101
  assert_equal "CDK5R1 protein", exp2
118
102
  end
119
103
 
104
+ def test_with_transform_sentences
105
+ a = "This first sentence mentions Bread. This sentence mentions the TP53 gene and the CDK5R1 protein"
106
+ original = a.dup
107
+
108
+ gene1 = "TP53"
109
+ gene1.extend NamedEntity
110
+ gene1.offset = a.index gene1
111
+
112
+ gene2 = "CDK5R1"
113
+ gene2.extend NamedEntity
114
+ gene2.offset = a.index gene2
115
+
116
+ bread = "Bread"
117
+ bread.extend NamedEntity
118
+ bread.offset = a.index bread
119
+
120
+ sentences = Segment.align(a, a.split(". "))
121
+
122
+ Transformed.with_transform(sentences[1], [gene1, gene2, bread], "GN") do
123
+ assert sentences[1].include?("GN gene and the GN protein")
124
+ end
125
+
126
+ Transformed.with_transform(sentences[0], [gene1, gene2, bread], "BR") do
127
+ assert sentences[0].include?("first sentence mentions BR")
128
+ end
129
+
130
+
131
+ end
132
+
120
133
  def test_html
121
134
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
122
135
 
123
136
  gene1 = "TP53"
124
137
  gene1.extend NamedEntity
125
138
  gene1.offset = a.index gene1
126
- gene1.type = "Gene"
139
+ gene1.entity_type = "Gene"
127
140
 
128
141
  gene2 = "CDK5R1"
129
142
  gene2.extend NamedEntity
130
143
  gene2.offset = a.index gene2
131
- gene2.type = "Protein"
144
+ gene2.entity_type = "Protein"
132
145
 
133
146
  Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
134
147
  assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
@@ -143,13 +156,13 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
143
156
  gene1.extend NamedEntity
144
157
  gene1.offset = a.index gene1
145
158
  gene1.offset += 10
146
- gene1.type = "Gene"
159
+ gene1.entity_type = "Gene"
147
160
 
148
161
  gene2 = "CDK5R1"
149
162
  gene2.extend NamedEntity
150
163
  gene2.offset = a.index gene2
151
164
  gene2.offset += 10
152
- gene2.type = "Protein"
165
+ gene2.entity_type = "Protein"
153
166
 
154
167
  Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
155
168
  assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
@@ -162,12 +175,12 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
162
175
  gene1 = "TP53"
163
176
  gene1.extend NamedEntity
164
177
  gene1.offset = a.index gene1
165
- gene1.type = "Gene"
178
+ gene1.entity_type = "Gene"
166
179
 
167
180
  gene2 = "TP53 gene"
168
181
  gene2.extend NamedEntity
169
182
  gene2.offset = a.index gene2
170
- gene2.type = "Expanded Gene"
183
+ gene2.entity_type = "Expanded Gene"
171
184
 
172
185
  assert_equal [gene1], Segment.overlaps(Segment.sort([gene1,gene2]))
173
186
 
@@ -379,5 +392,46 @@ This is another sentence. Among the nonstructural proteins, the leader protein (
379
392
  end
380
393
  end
381
394
  end
395
+
396
+ def ___test_transform
397
+ a = "This sentence mentions the TP53 gene and the CDK5 protein"
398
+ original = a.dup
399
+
400
+ gene1 = "TP53"
401
+ gene1.extend Segment
402
+ gene1.offset = a.index gene1
403
+
404
+ gene2 = "CDK5"
405
+ gene2.extend Segment
406
+ gene2.offset = a.index gene2
407
+
408
+ assert_equal gene1, a[gene1.range]
409
+ assert_equal gene2, a[gene2.range]
410
+
411
+ c = a.dup
412
+
413
+ c[gene2.range] = "GN"
414
+ assert_equal c, Transformed.transform(a,[gene2], "GN")
415
+ c[gene1.range] = "GN"
416
+ assert_equal c, Transformed.transform(a,[gene1], "GN")
417
+
418
+ iii a.transformation_offset_differences
419
+ raise
420
+ assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
421
+ assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
422
+
423
+
424
+ gene3 = "GN gene"
425
+ gene3.extend Segment
426
+ gene3.offset = a.index gene3
427
+
428
+ assert_equal gene3, a[gene3.range]
429
+
430
+ a.restore([gene3])
431
+ assert_equal original, a
432
+ assert_equal "TP53 gene", a[gene3.range]
433
+
434
+ end
435
+
382
436
  end
383
437