RubyGems - rbbt-text - Versions diffs - 1.1.9 → 1.3.3 - Mend

rbbt-text 1.1.9 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

checksums.yaml +4 -4
data/lib/rbbt/bow/bow.rb +5 -2
data/lib/rbbt/bow/dictionary.rb +27 -23
data/lib/rbbt/document.rb +56 -0
data/lib/rbbt/document/annotation.rb +45 -0
data/lib/rbbt/document/corpus.rb +61 -0
data/lib/rbbt/document/corpus/pubmed.rb +33 -0
data/lib/rbbt/ner/NER.rb +3 -3
data/lib/rbbt/ner/abner.rb +1 -1
data/lib/rbbt/ner/banner.rb +1 -1
data/lib/rbbt/ner/brat.rb +1 -1
data/lib/rbbt/ner/chemical_tagger.rb +1 -2
data/lib/rbbt/ner/g_norm_plus.rb +42 -12
data/lib/rbbt/ner/linnaeus.rb +3 -3
data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
data/lib/rbbt/ner/oscar3.rb +1 -2
data/lib/rbbt/ner/oscar4.rb +3 -3
data/lib/rbbt/ner/patterns.rb +5 -5
data/lib/rbbt/ner/regexpNER.rb +1 -2
data/lib/rbbt/ner/token_trieNER.rb +35 -22
data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
data/lib/rbbt/nlp/nlp.rb +5 -5
data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
data/lib/rbbt/nlp/spaCy.rb +52 -0
data/lib/rbbt/segment.rb +179 -0
data/lib/rbbt/segment/annotation.rb +58 -0
data/lib/rbbt/segment/encoding.rb +18 -0
data/lib/rbbt/{text/segment → segment}/named_entity.rb +11 -10
data/lib/rbbt/segment/overlaps.rb +63 -0
data/lib/rbbt/segment/range_index.rb +35 -0
data/lib/rbbt/segment/relationship.rb +7 -0
data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
data/lib/rbbt/segment/token.rb +23 -0
data/lib/rbbt/{text/segment → segment}/transformed.rb +10 -8
data/lib/rbbt/segment/tsv.rb +41 -0
data/share/install/software/Linnaeus +1 -1
data/share/install/software/OpenNLP +1 -1
data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
data/test/rbbt/document/test_annotation.rb +140 -0
data/test/rbbt/document/test_corpus.rb +33 -0
data/test/rbbt/ner/test_finder.rb +3 -3
data/test/rbbt/ner/test_g_norm_plus.rb +20 -3
data/test/rbbt/ner/test_patterns.rb +9 -9
data/test/rbbt/ner/test_regexpNER.rb +14 -14
data/test/rbbt/ner/test_rnorm.rb +3 -4
data/test/rbbt/ner/test_token_trieNER.rb +1 -0
data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
data/test/rbbt/segment/test_annotation.rb +39 -0
data/test/rbbt/segment/test_corpus.rb +36 -0
data/test/rbbt/segment/test_encoding.rb +24 -0
data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
data/test/rbbt/segment/test_overlaps.rb +69 -0
data/test/rbbt/segment/test_range_index.rb +42 -0
data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
data/test/rbbt/test_document.rb +14 -0
data/test/rbbt/test_segment.rb +182 -0
data/test/test_helper.rb +5 -3
data/test/test_spaCy.rb +32 -0
metadata +44 -32
data/lib/rbbt/text/corpus.rb +0 -106
data/lib/rbbt/text/corpus/document.rb +0 -361
data/lib/rbbt/text/corpus/document_repo.rb +0 -68
data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
data/lib/rbbt/text/document.rb +0 -39
data/lib/rbbt/text/segment.rb +0 -355
data/lib/rbbt/text/segment/docid.rb +0 -46
data/lib/rbbt/text/segment/relationship.rb +0 -24
data/lib/rbbt/text/segment/token.rb +0 -49
data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
data/test/rbbt/text/corpus/test_document.rb +0 -52
data/test/rbbt/text/segment/test_relationship.rb +0 -0
data/test/rbbt/text/segment/test_segmented.rb +0 -23
data/test/rbbt/text/test_corpus.rb +0 -34
data/test/rbbt/text/test_document.rb +0 -58
data/test/rbbt/text/test_segment.rb +0 -100

data/test/rbbt/ner/test_rnorm.rb CHANGED

@@ -27,10 +27,9 @@ S000000376	AAA	GENE1	DDD
      assert_equal(["S000000029"], @norm.match("FUN21"))
      assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
      assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
-     assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
-     assert_equal([], @norm.match("GER4"))
-     @norm.match("FUN21")
+     assert_equal(["S000000029"].sort, @norm.match("FUN 21").sort)
+     assert_equal([], @norm.match("Non-sense"))
+     assert_equal(["S000000029", "S000000374"], @norm.match("GER4"))
   end
   def test_select

data/test/rbbt/ner/test_token_trieNER.rb CHANGED

@@ -74,6 +74,7 @@ C2;11;22;3 3;bb
       index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
       assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
+      assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
     end
   end

data/test/rbbt/nlp/genia/test_sentence_splitter.rb CHANGED

@@ -1,9 +1,43 @@
 require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
 require 'rbbt/nlp/genia/sentence_splitter'
-class TestClass < Test::Unit::TestCase
-  def test_true
-    assert true
+class TestNLP < Test::Unit::TestCase
+  def test_sentences
+    text =<<-EOF
+This is a sentence.
+A funky character ™ in a sentence.
+This is a sentence.
+This is a broken
+sentence. This is
+another broken sentence.
+    EOF
+    iii NLP.geniass_sentence_splitter(text)
+    assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter(text)[2].strip
+  end
+  def test_sentences_2
+    text =<<-EOF
+This is a sentence.
+This is a sentence.
+This is a broken
+sentence. This is
+another broken sentence.
+    EOF
+    assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter(text)[2].strip
+  end
+  def test_sentences_ext
+    text =<<-EOF
+This is a sentence.
+This is a sentence.
+This is a broken
+sentence. This is
+another broken sentence.
+    EOF
+    assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter_extension(text)[2].strip
   end
 end

data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb CHANGED

@@ -1,6 +1,6 @@
 require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
 require 'rbbt/nlp/open_nlp/sentence_splitter'
-require 'rbbt/ner/segment'
+require 'rbbt/segment'
 $text=<<-EOF
 Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
@@ -22,6 +22,22 @@ class TestClass < Test::Unit::TestCase
   def test_sentences
     text =<<-EOF
 This is a sentence.
+No funky character in this sentence.
+This is a sentence.
+This is a
+sentence. This is
+another sentence.
+    EOF
+    assert_equal 5, OpenNLP.sentence_split_detector.sentDetect(text).length
+    assert_equal 5, OpenNLP.sentence_splitter(text).length
+    assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
+  end
+  def test_sentences_fix_utf8
+    text =<<-EOF
+This is a sentence.
 A funky character ™ in a sentence.
 This is a sentence.
 This is a
@@ -35,12 +51,12 @@ another sentence.
     assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
   end
-  def _test_text_sentences
+  def test_text_sentences
     Misc.benchmark(100) do
-      OpenNLP.sentence_splitter($text).include? "Our
+      assert OpenNLP.sentence_splitter($text).include?("Our
 findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
 AT/RT and the usefulness of antibodies directed against SMARCA4 in this
-diagnostic setting."
+diagnostic setting.")
     end
   end
 end

data/test/rbbt/segment/test_annotation.rb ADDED

@@ -0,0 +1,39 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
+require 'rbbt/document'
+require 'rbbt/document/corpus'
+require 'rbbt/segment'
+require 'rbbt/segment/annotation'
+class TestAnnotation < Test::Unit::TestCase
+  def test_annotation
+    text = "This is a document"
+    Document.setup(text, "TEST", "test_doc1", nil)
+    segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
+    annotation = SegmentAnnotation.setup(segment, :type => :verb)
+    assert_equal 'verb', annotation.annotid.split(":")[5]
+    annotation = SegmentAnnotation.setup(segment.segid, :type => :verb)
+    assert_equal 'verb', annotation.annotid.split(":")[5]
+  end
+  def test_annotid
+    text = "This is a document"
+    Document.setup(text, "TEST", "test_doc1", nil)
+    corpus = Document::Corpus.setup({})
+    corpus.add_document(text)
+    segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
+    annotation = SegmentAnnotation.setup(segment, :type => :verb)
+    annotid = annotation.annotid(corpus)
+    assert_equal 'verb', annotid.type
+    assert_equal 'verb', annotid.annotation.type
+    assert_equal 'is', annotid.annotation
+  end
+end

data/test/rbbt/segment/test_corpus.rb ADDED

@@ -0,0 +1,36 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
+require 'rbbt/document'
+require 'rbbt/document/corpus'
+require 'rbbt/segment'
+require 'rbbt/segment/corpus'
+class TestSegmentCorpus < Test::Unit::TestCase
+  def test_corpus
+    text = "This is a document"
+    Document.setup(text, "TEST", "test_doc1", nil)
+    corpus = {}
+    corpus.extend Document::Corpus
+    corpus.add_document(text)
+    docid = text.docid(corpus)
+    assert_equal docid.document, text
+  end
+  def test_find
+    text = "This is a document"
+    Document.setup(text, "TEST", "test_doc1", nil)
+    TmpFile.with_file do |path|
+      corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
+      corpus.extend Document::Corpus
+      corpus.add_document(text)
+      assert corpus.prefix("TEST:").include?(text.docid)
+    end
+  end
+end

data/test/rbbt/segment/test_encoding.rb ADDED

@@ -0,0 +1,24 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
+require 'rbbt/segment/encoding'
+class TestEncoding < Test::Unit::TestCase
+  def test_bad_chars
+    text = "A funky character ™ in a sentence."
+    assert_equal ["™"], Segment.bad_chars(text)
+  end
+  def test_ascii
+    text = "A funky character ™ in a sentence."
+    Segment.ascii(text) do
+      assert_equal "A funky character ? in a sentence.",  text
+    end
+    Segment.ascii(text, "NONASCII") do
+      assert_equal "A funky character NONASCII in a sentence.",  text
+    end
+    assert_equal "A funky character ™ in a sentence.",  text
+  end
+end

data/test/rbbt/{text/segment → segment}/test_named_entity.rb RENAMED

@@ -1,6 +1,6 @@
-require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
-require 'rbbt/text/segment'
-require 'rbbt/text/segment/named_entity'
+require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
+require 'rbbt/segment'
+require 'rbbt/segment/named_entity'
 class TestClass < Test::Unit::TestCase
   def test_info
@@ -15,35 +15,39 @@ class TestClass < Test::Unit::TestCase
   def test_all_args
     a = "test"
-    NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
+    NamedEntity.setup a, 10, "TEST:doc1:test_type:hash", "NamedEntity", "TYPE", "CODE", "SCORE"
     assert_equal 10, a.offset
+    assert_equal "NamedEntity", a.type
+    assert_equal "TYPE", a.entity_type
+    assert_equal "SCORE", a.score
   end
   def test_tsv
     a = "test"
     NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
-    assert Segment.tsv([a]).fields.include? "code"
-    assert Segment.tsv([a], nil).fields.include? "code"
-    assert Segment.tsv([a], "literal").fields.include? "code"
+    assert Annotated.tsv([a]).fields.include? "code"
+    assert Annotated.tsv([a], nil).fields.include? "code"
+    assert Annotated.tsv([a], :all).fields.include? "code"
+    assert Annotated.tsv([a], :all).fields.include? "literal"
   end
-  def test_segment_brat
+  def __test_segment_brat
     a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
     gene1 = "TP53"
     gene1.extend NamedEntity
     gene1.offset = a.index gene1
-    gene1.type = "Gene"
+    gene1.entity_type = "Gene"
     gene2 = "CDK5R1"
     gene2.extend NamedEntity
     gene2.offset = a.index gene2
-    gene2.type = "Gene"
+    gene2.entity_type = "Gene"
     gene3 = "TP53 gene"
     gene3.extend NamedEntity
     gene3.offset = a.index gene3
-    gene3.type = "Gene"
+    gene3.entity_type = "Gene"
     segments = [gene1, gene2, gene3]
     assert segments.collect{|s| s.to_brat}.include? "Gene 27 35"

data/test/rbbt/segment/test_overlaps.rb ADDED

@@ -0,0 +1,69 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
+require 'rbbt/segment'
+require 'rbbt/segment/overlaps'
+class TestOverlaps < Test::Unit::TestCase
+  def setup
+    @text = <<-EOF
+This is a first sentence. More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
+    EOF
+    @entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].collect do |literal|
+      Segment.setup(literal, :offset => @text.index(literal))
+    end
+    @sentences = @text.partition(".").values_at(0, 2).collect do |sentence|
+      Segment.setup sentence, :offset => @text.index(sentence)
+    end
+  end
+  def test_make_relative
+    sentence = @sentences[1]
+    @entities.each do |e|
+      assert_equal e, @text[e.range]
+    end
+    sentence.make_relative @entities do
+      @entities.each do |e|
+        assert_equal e, sentence[e.range]
+      end
+      @entities.each do |e|
+        assert_not_equal e, @text[e.range]
+      end
+    end
+    @entities.each do |e|
+      assert_equal e, @text[e.range]
+    end
+  end
+  def test_range_in
+    sentence = @sentences[1]
+    @entities.each do |e|
+      assert_equal e.range_in(sentence).begin, sentence.index(e)
+      assert_equal e.range.begin - sentence.offset, sentence.index(e)
+    end
+  end
+  def test_includes
+    @entities.each do |e|
+      assert ! @sentences[0].include?(e)
+      assert @sentences[1].include?(e)
+      assert ! e.include?(@sentences[0])
+      assert ! e.include?(@sentences[1])
+    end
+  end
+  def test_overlaps?
+    @entities.each do |e|
+      assert ! @sentences[0].overlaps?(e)
+      assert @sentences[1].overlaps?(e)
+      assert ! e.overlaps?(@sentences[0])
+      assert e.overlaps?(@sentences[1])
+    end
+  end
+end

data/test/rbbt/segment/test_range_index.rb ADDED

@@ -0,0 +1,42 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
+require 'rbbt/document'
+require 'rbbt/document/corpus'
+require 'rbbt/segment'
+require 'rbbt/segment/range_index'
+class TestRangeIndex < Test::Unit::TestCase
+  def test_segment_index
+    text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
+    Document.setup(text, "TEST", "test_doc1", nil)
+    corpus = Document::Corpus.setup({})
+    corpus.add_document(text)
+    gene1 = "TP53"
+    gene1.extend Segment
+    gene1.offset = text.index gene1
+    gene1.docid = text.docid
+    gene2 = "CDK5R1"
+    gene2.extend Segment
+    gene2.offset = text.index gene2
+    gene2.docid = text.docid
+    gene3 = "TP53 gene"
+    gene3.extend Segment
+    gene3.offset = text.index gene1
+    gene3.docid = text.docid
+    index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus)
+    assert_equal "CDK5R1", index[gene2.offset + 1].segment.first
+    TmpFile.with_file do |fwt|
+      index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus, fwt)
+      assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
+      index = Segment::RangeIndex.index([gene1, gene2, gene3], corpus, fwt)
+      assert_equal %w(CDK5R1), index[gene2.offset + 1].segment
+    end
+  end
+end

data/test/rbbt/{text/segment → segment}/test_transformed.rb RENAMED

@@ -1,10 +1,21 @@
-require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
-require 'rbbt/text/segment/transformed'
-require 'rbbt/text/segment/named_entity'
+require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
+require 'rbbt/segment/transformed'
+require 'rbbt/segment/named_entity'
 require 'rexml/document'
-class TestClass < Test::Unit::TestCase
-  def test_sort
+class TestTransformed < Test::Unit::TestCase
+  def setup
+    @text = <<-EOF
+More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
+    EOF
+    @entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].collect do |literal|
+      NamedEntity.setup(literal, :offset => @text.index(literal))
+    end
+  end
+  def test_transform
     text = <<-EOF
 More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
     EOF
@@ -13,52 +24,25 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
       NamedEntity.setup(literal, :offset => text.index(literal))
     end
-    Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
-      assert text.include? "such as [IL-2]"
-    end
+    Transformed.transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" })
+    assert text.include? "such as [IL-2]"
   end
-  def ___test_transform
-    a = "This sentence mentions the TP53 gene and the CDK5 protein"
-    original = a.dup
-    gene1 = "TP53"
-    gene1.extend Segment
-    gene1.offset = a.index gene1
-    gene2 = "CDK5"
-    gene2.extend Segment
-    gene2.offset = a.index gene2
-    assert_equal gene1, a[gene1.range]
-    assert_equal gene2, a[gene2.range]
-    c = a.dup
-    c[gene2.range] = "GN"
-    assert_equal c, Transformed.transform(a,[gene2], "GN")
-    c[gene1.range] = "GN"
-    assert_equal c, Transformed.transform(a,[gene1], "GN")
-    iii a.transformation_offset_differences
-    raise
-    assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
-    assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
-    gene3 = "GN gene"
-    gene3.extend Segment
-    gene3.offset = a.index gene3
-    assert_equal gene3, a[gene3.range]
+  def test_with_transform
+    text = <<-EOF
+More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
+    EOF
-    a.restore([gene3])
-    assert_equal original, a
-    assert_equal "TP53 gene", a[gene3.range]
+    entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].reverse.collect do |literal|
+      NamedEntity.setup(literal, :offset => text.index(literal))
+    end
+    Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
+      assert text.include? "such as [IL-2]"
+    end
   end
-  def test_with_transform
+  def test_with_transform_2
     a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
     original = a.dup
@@ -117,18 +101,47 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
     assert_equal "CDK5R1 protein", exp2
   end
+  def test_with_transform_sentences
+    a = "This first sentence mentions Bread. This sentence mentions the TP53 gene and the CDK5R1 protein"
+    original = a.dup
+    gene1 = "TP53"
+    gene1.extend NamedEntity
+    gene1.offset = a.index gene1
+    gene2 = "CDK5R1"
+    gene2.extend NamedEntity
+    gene2.offset = a.index gene2
+    bread = "Bread"
+    bread.extend NamedEntity
+    bread.offset = a.index bread
+    sentences = Segment.align(a, a.split(". "))
+    Transformed.with_transform(sentences[1], [gene1, gene2, bread], "GN") do
+      assert sentences[1].include?("GN gene and the GN protein")
+    end
+    Transformed.with_transform(sentences[0], [gene1, gene2, bread], "BR") do
+      assert sentences[0].include?("first sentence mentions BR")
+    end
+  end
   def test_html
     a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
     gene1 = "TP53"
     gene1.extend NamedEntity
     gene1.offset = a.index gene1
-    gene1.type = "Gene"
+    gene1.entity_type = "Gene"
     gene2 = "CDK5R1"
     gene2.extend NamedEntity
     gene2.offset = a.index gene2
-    gene2.type = "Protein"
+    gene2.entity_type = "Protein"
     Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
       assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
@@ -143,13 +156,13 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
     gene1.extend NamedEntity
     gene1.offset = a.index gene1
     gene1.offset += 10
-    gene1.type = "Gene"
+    gene1.entity_type = "Gene"
     gene2 = "CDK5R1"
     gene2.extend NamedEntity
     gene2.offset = a.index gene2
     gene2.offset += 10
-    gene2.type = "Protein"
+    gene2.entity_type = "Protein"
     Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
       assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
@@ -162,12 +175,12 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
     gene1 = "TP53"
     gene1.extend NamedEntity
     gene1.offset = a.index gene1
-    gene1.type = "Gene"
+    gene1.entity_type = "Gene"
     gene2 = "TP53 gene"
     gene2.extend NamedEntity
     gene2.offset = a.index gene2
-    gene2.type = "Expanded Gene"
+    gene2.entity_type = "Expanded Gene"
     assert_equal [gene1], Segment.overlaps(Segment.sort([gene1,gene2]))
@@ -379,5 +392,46 @@ This is another sentence. Among the nonstructural proteins, the leader protein (
       end
     end
   end
+  def ___test_transform
+    a = "This sentence mentions the TP53 gene and the CDK5 protein"
+    original = a.dup
+    gene1 = "TP53"
+    gene1.extend Segment
+    gene1.offset = a.index gene1
+    gene2 = "CDK5"
+    gene2.extend Segment
+    gene2.offset = a.index gene2
+    assert_equal gene1, a[gene1.range]
+    assert_equal gene2, a[gene2.range]
+    c = a.dup
+    c[gene2.range] = "GN"
+    assert_equal c, Transformed.transform(a,[gene2], "GN")
+    c[gene1.range] = "GN"
+    assert_equal c, Transformed.transform(a,[gene1], "GN")
+    iii a.transformation_offset_differences
+    raise
+    assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
+    assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
+    gene3 = "GN gene"
+    gene3.extend Segment
+    gene3.offset = a.index gene3
+    assert_equal gene3, a[gene3.range]
+    a.restore([gene3])
+    assert_equal original, a
+    assert_equal "TP53 gene", a[gene3.range]
+  end
 end