RubyGems - rbbt-text - Versions diffs - 1.3.4 → 1.3.7 - Mend

rbbt-text 1.3.4 → 1.3.7

Files changed (35) hide show

checksums.yaml +4 -4
data/LICENSE +20 -0
data/lib/rbbt/document/annotation.rb +2 -2
data/lib/rbbt/document/corpus/pubmed.rb +14 -5
data/lib/rbbt/document/corpus.rb +10 -7
data/lib/rbbt/document.rb +7 -3
data/lib/rbbt/ner/abner.rb +3 -2
data/lib/rbbt/ner/banner.rb +3 -1
data/lib/rbbt/ner/brat.rb +1 -1
data/lib/rbbt/ner/linnaeus.rb +2 -1
data/lib/rbbt/ner/oscar3.rb +0 -1
data/lib/rbbt/ner/oscar4.rb +0 -1
data/lib/rbbt/ner/rner.rb +229 -0
data/lib/rbbt/ner/rnorm/tokens.rb +3 -1
data/lib/rbbt/ner/rnorm.rb +5 -1
data/lib/rbbt/ner/token_trieNER.rb +2 -1
data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +1 -1
data/lib/rbbt/nlp/spaCy.rb +158 -15
data/lib/rbbt/relationship.rb +24 -0
data/lib/rbbt/segment/named_entity.rb +4 -0
data/lib/rbbt/segment/range_index.rb +1 -1
data/lib/rbbt/segment/transformed.rb +9 -1
data/lib/rbbt/segment.rb +3 -0
data/share/install/software/OpenNLP +3 -8
data/share/rner/config.rb +51 -0
data/test/rbbt/document/corpus/test_pubmed.rb +1 -1
data/test/rbbt/document/test_annotation.rb +10 -1
data/test/rbbt/document/test_corpus.rb +14 -0
data/test/rbbt/ner/rnorm/test_tokens.rb +11 -0
data/test/rbbt/ner/test_rner.rb +132 -0
data/test/rbbt/ner/test_rnorm.rb +5 -0
data/test/rbbt/segment/test_named_entity.rb +2 -1
data/test/rbbt/segment/test_transformed.rb +13 -30
data/test/test_spaCy.rb +113 -1
metadata +13 -18

data/lib/rbbt/nlp/spaCy.rb CHANGED Viewed

@@ -2,30 +2,55 @@ require 'rbbt/segment'
 require 'rbbt/document'
 require 'rbbt/segment/annotation'
 require 'rbbt/util/python'
+require 'rbbt/network/paths'
 module SpaCy
-  PROPERTIES = %w(lemma_ is_punct is_space shape_ pos_ tag_)
+  TOKEN_PROPERTIES = %w(lemma_ is_punct is_space shape_ pos_ tag_)
+  CHUNK_PROPERTIES = %w(lemma_)
-  def self.tokens(text, lang = 'en')
+  def self.nlp(lang = 'en_core_web_md')
+    @@nlp ||= {}
+    @@nlp[lang] ||= RbbtPython.run :spacy do
+      spacy.load(lang)
+    end
+  end
+  def self.tokens(text, lang = 'en_core_web_sm')
     tokens = []
-    RbbtPython.run 'spacy' do
-      nlp = spacy.load(lang)
-      doc = nlp.call(text)
-      doc.__len__.times do |i|
-        tokens << doc.__getitem__(i)
-      end
+    nlp = nlp(lang)
+    doc = nlp.call(text)
+    doc.__len__.times do |i|
+      tokens << doc.__getitem__(i)
+    end
+    tokens
+  end
+  def self.chunks(text, lang = 'en_core_web_sm')
+    tokens = []
+    nlp = nlp(lang)
+    doc = nlp.call(text)
+    chunks = doc.noun_chunks.__iter__
+    RbbtPython.iterate chunks do |item|
+      tokens << item
     end
     tokens
   end
-  def self.segments(text, lang = 'en')
-    docid = text.docid if Document === text
+  def self.segments(text, lang = 'en_core_web_sm')
+    docid = text.docid if Document === text
     corpus = text.corpus if Document === text
     tokens = self.tokens(text, lang).collect do |token|
       info = {}
-      PROPERTIES.each do |p|
+      TOKEN_PROPERTIES.each do |p|
         info[p] = token.instance_eval(p.to_s)
       end
       info[:type] = "SpaCy"
@@ -35,7 +60,120 @@ module SpaCy
       info[:corpus] = corpus if corpus
       SpaCyToken.setup(token.text, info)
     end
-    SpaCyToken.setup(tokens, :corpus => corpus)
+    tokens
+  end
+  def self.chunk_segments(text, lang = 'en_core_web_sm')
+    docid = text.docid if Document === text
+    corpus = text.corpus if Document === text
+    chunks = self.chunks(text, lang).collect do |chunk|
+      info = {}
+      CHUNK_PROPERTIES.each do |p|
+        info[p] = chunk.instance_eval(p.to_s)
+      end
+      start = eend =  nil
+      deps = []
+      RbbtPython.iterate chunk.__iter__ do |token|
+        start = token.idx if start.nil?
+        eend = start + chunk.text.length if eend.nil?
+        deps << token.idx.to_s + ":" + token.dep_ + "->" + token.head.idx.to_s if token.head.idx < start || token.head.idx > eend
+      end
+      info[:type] = "SpaCy"
+      info[:offset] = chunk.__iter__.__next__.idx
+      info[:dep] = deps * ";"
+      info[:docid] = docid if docid
+      info[:corpus] = corpus if corpus
+      SpaCySpan.setup(chunk.text, info)
+    end
+    chunks
+  end
+  def self.dep_graph(text, reverse = false, lang = 'en_core_web_md')
+    tokens = self.segments(text, lang)
+    index = Segment.index(tokens)
+    associations = {}
+    tokens.each do |token|
+      type, target_pos = token.dep.split("->")
+      target_tokens = index[target_pos.to_i]
+      associations[token.segid] = target_tokens
+    end
+    if reverse
+      old = associations.dup
+      old.each do |s,ts|
+        ts.each do |t|
+          associations[t] ||= []
+          associations[t] += [s] unless associations[t].include?(s)
+        end
+      end
+    end
+    associations
+  end
+  def self.chunk_dep_graph(text, reverse = false, lang = 'en_core_web_md')
+    associations = dep_graph(text, false, lang)
+    chunks = self.chunk_segments(text, lang)
+    tokens = self.segments(text, lang)
+    index = Segment.index(tokens + chunks)
+    chunks.each do |chunk|
+      target_token_ids = chunk.dep.split(";").collect do|dep|
+        type, target_pos = dep.split("->")
+        index[target_pos.to_i]
+      end.flatten
+      target_tokens = target_token_ids.collect do |target_token_id|
+        range = Range.new(*target_token_id.split(":").last.split("..").map(&:to_i))
+        range.collect do |pos|
+          index[pos]
+        end.uniq
+      end.flatten
+      associations[chunk.segid] = target_tokens
+    end
+    if reverse
+      old = associations.dup
+      old.each do |s,ts|
+        ts.each do |t|
+          associations[t] ||= []
+          associations[t] += [s] unless associations[t].include?(s)
+        end
+      end
+    end
+    associations
+  end
+  def self.paths(text, source, target, reverse = true, lang = 'en_core_web_md')
+    graph = SpaCy.chunk_dep_graph(text, reverse, lang)
+    chunk_index = Segment.index(SpaCy.chunk_segments(text, lang))
+    source_id = chunk_index[source.offset].first || source.segid
+    target_id = chunk_index[target.offset].first || target.segid
+    path = Paths.dijkstra(graph, source_id, [target_id])
+    return nil if path.nil?
+    path.reverse
+  end
+  def self.config(base, target = nil)
+    TmpFile.with_file(base) do |baseconfig|
+      if target
+        CMD.cmd(:spacy, "init fill-config #{baseconfig} #{target}")
+      else
+        TmpFile.with_file do |tmptarget|
+          CMD.cmd(:spacy, "init fill-config #{baseconfig} #{tmptarget}")
+          Open.read(targetconfig)
+        end
+      end
+    end
   end
 end
@@ -43,10 +181,15 @@ module SpaCyToken
   extend Entity
   include SegmentAnnotation
-  self.annotation *SpaCy::PROPERTIES
+  self.annotation *SpaCy::TOKEN_PROPERTIES
   self.annotation :dep
 end
-if __FILE__ == $0
-  ppp Annotated.tsv(SpaCy.segments("I tell a story"), :all)
+module SpaCySpan
+  extend Entity
+  include SegmentAnnotation
+  self.annotation *SpaCy::CHUNK_PROPERTIES
+  self.annotation :dep
 end

data/lib/rbbt/relationship.rb ADDED Viewed

@@ -0,0 +1,24 @@
+require 'rbbt/segment'
+module Relationship
+  extend Annotation
+  self.annotation :segment
+  self.annotation :terms
+  self.annotation :type
+  def text
+    if segment
+      segment
+    else
+      type + ": " + terms * ", "
+    end
+  end
+  def html
+    text = <<-EOF
+<span class='Relationship'\
+>#{ self.text }</span>
+    EOF
+    text.chomp
+  end
+end

data/lib/rbbt/segment/named_entity.rb CHANGED Viewed

@@ -8,6 +8,10 @@ module NamedEntity
   self.annotation :entity_type, :code, :score
+  def entity_type
+    annotation_values[:entity_type] || annotation_values[:type]
+  end
   def report
     <<-EOF
 String: #{ self }

data/lib/rbbt/segment/range_index.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Segment::RangeIndex
     SegID.setup(res, :corpus => corpus)
   end
-  def self.index(segments, corpus, persist_file = :memory)
+  def self.index(segments, corpus = nil, persist_file = :memory)
     segments = segments.values.flatten if Hash === segments
     annotation_index =

data/lib/rbbt/segment/transformed.rb CHANGED Viewed

@@ -70,7 +70,15 @@ module Transformed
     orig_length = self.length
     offset = self.respond_to?(:offset) ? self.offset.to_i : 0
-    segments = segments.select{|s| s.offset.to_i >= offset && s.offset.to_i <= offset + self.length - 1 }
+    segments = segments.select do |s|
+      shift = shift s.range
+      s_offset = s.offset.to_i
+      s_offset += shift.first if shift
+      s_offset >= offset &&
+        s_offset <= offset + self.length - 1
+    end
     Segment.clean_sort(segments).each do |segment|
       next if segment.offset.nil?

data/lib/rbbt/segment.rb CHANGED Viewed

@@ -49,10 +49,13 @@ module Segment
     length
   end
   def eend
     offset.to_i + length - 1
   end
+  alias end eend
   def range
     (offset.to_i..eend)
   end

data/share/install/software/OpenNLP CHANGED Viewed

@@ -1,12 +1,7 @@
 #!/bin/bash
 name='OpenNLP'
-url="http://apache.rediris.es/opennlp/opennlp-1.9.2/apache-opennlp-1.9.2-bin.tar.gz"
+url="http://apache.rediris.es/opennlp/opennlp-1.9.4/apache-opennlp-1.9.4-bin.tar.gz"
-get_src "$name" "$url"
-move_opt "$name"
-ln -sf  "$OPT_DIR/$name/lib/"*.jar  "$OPT_JAR_DIR/"
-clean_build
+install_src $name $url
+(cd $OPT_DIR/jars; ln -s $OPT_DIR/$name/lib/*.jar .)

data/share/rner/config.rb ADDED Viewed

@@ -0,0 +1,51 @@
+isLetters     /^[A-Z]+$/i
+isUpper       /^[A-Z]+$/
+isLower       /^[a-z]+$/
+isDigits      /^[0-9]+$/i
+isRoman       /^[IVX]+$/
+isGreek       /^(?:alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)$/i
+isPunctuation /^[,.;]$/
+isDelim       /^[\/()\[\]{}\-]$/
+isNonWord     /^[^\w]+$/
+isConjunction /^and|or|&|,$/
+hasLetters    /[A-Z]/i
+hasUpper      /.[A-Z]/
+hasLower      /[a-z]/
+hasDigits     /[0-9]/i
+hasGreek      /(?:alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)/i
+hasPunctuation /[,.;]/
+hasDelim      /[\/()\[\]{}\-]/
+hasNonWord    /[^\w]/
+caspMix       /[a-z].[A-Z]/
+keywords      /(?:protein|gene|domain|ase)s?$/
+hasSuffix     /[a-z][A-Z0-9]$/
+numLetters    do |w| w.scan(/[A-Z]/i).length end
+numDigits     do |w| w.scan(/[0-9]/).length end
+#
+prefix_3      /^(...)/
+prefix_4      /^(....)/
+suffix_3      /(...)$/
+suffix_4      /(....)$/
+token1        do |w|
+                 w.sub(/[A-Z]/,'A').
+                   sub(/[a-z]/,'a').
+                   sub(/[0-9]/,'0').
+                   sub(/[^0-9a-z]/i,'x')
+              end
+token2        do  |w|
+                 w.sub(/[A-Z]+/,'A').
+                   sub(/[a-z]+/,'a').
+                   sub(/[0-9]+/,'0').
+                   sub(/[^0-9a-z]+/i,'x')
+               end
+token3         do |w| w.downcase end
+special        do |w| w.is_special? end
+context   %w(special token2 isPunctuation isDelim)
+window     %w(1 2 3 -1 -2 -3)
+#direction :reverse

data/test/rbbt/document/corpus/test_pubmed.rb CHANGED Viewed

@@ -7,7 +7,7 @@ class TestCorpusPubmed < Test::Unit::TestCase
   def test_add_pmid
     corpus = Document::Corpus.setup({})
-    document = corpus.add_pmid("32299157", :abstract).first
+    document = corpus.add_pmid("33359141", :abstract).first
     title = document.to(:title)
     assert title.include?("COVID-19")
   end

data/test/rbbt/document/test_annotation.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require 'rbbt/document/corpus'
 require 'rbbt/segment'
 require 'rbbt/document/annotation'
 require 'rbbt/segment/named_entity'
+require 'rbbt/ner/abner'
 class TestAnnotation < Test::Unit::TestCase
   class CalledOnce < Exception; end
@@ -28,6 +29,12 @@ class TestAnnotation < Test::Unit::TestCase
       self.split(" ").collect{|e| NamedEntity.setup(e, :code => Misc.digest(e)) }
     end
+    Document.define :abner do
+      $called_once = true
+      Abner.new.match(self)
+    end
     Document.persist :ner
   end
@@ -133,7 +140,9 @@ class TestAnnotation < Test::Unit::TestCase
     text.ner
     assert ! $called_once
+    assert_equal text.abner.first.docid, text.docid
     assert  text.ner.first.segid.include?("TEST:")
   end
 end

data/test/rbbt/document/test_corpus.rb CHANGED Viewed

@@ -29,5 +29,19 @@ class TestDocumentCorpus < Test::Unit::TestCase
       assert corpus.docids("TEST:").include?(text.docid)
     end
   end
+  def test_load
+    text = "This is a document"
+    Document.setup(text, "TEST", "test_doc1", nil)
+    TmpFile.with_file do |path|
+      corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
+      corpus.extend Document::Corpus
+      corpus.add_document(text)
+      assert corpus.docids("TEST:").include?(text.docid)
+    end
+  end
 end

data/test/rbbt/ner/rnorm/test_tokens.rb ADDED Viewed

@@ -0,0 +1,11 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
+require 'rbbt/ner/rnorm'
+class TestRNorm < Test::Unit::TestCase
+  def test_evaluate
+    t = Tokenizer.new
+    assert t.evaluate("PDGFRA","PDGFRalpha") > 0
+    iii t.evaluate("JUNB","JunB")
+  end
+end

data/test/rbbt/ner/test_rner.rb ADDED Viewed

@@ -0,0 +1,132 @@
+require File.dirname(__FILE__) + '/../../test_helper'
+require 'rbbt'
+require 'rbbt/ner/rner'
+require 'test/unit'
+class TestRNer < Test::Unit::TestCase
+  def setup
+    @parser = NERFeatures.new() do
+      isLetters     /^[A-Z]+$/i
+      context prefix_3      /^(...)/
+      downcase do |w| w.downcase end
+      context %w(downcase)
+    end
+  end
+  def test_config
+    config = <<-EOC
+      isLetters     /^[A-Z]+$/i
+      context prefix_3      /^(...)/
+      downcase do |w| w.downcase end
+      context %w(downcase)
+    EOC
+    assert_equal config.strip, @parser.config.strip
+  end
+  def test_reverse
+    assert_equal("protein P53", NERFeatures.reverse("P53 protein"))
+    assert_equal(
+       ". LH of assay - radioimmuno serum the with compared was LH urinary for ) GONAVIS - HI ( test hemagglutination direct new A",
+     NERFeatures.reverse(
+       "A new direct hemagglutination test (HI-GONAVIS) for urinary LH was compared with the serum\n radioimmuno-assay of LH."
+      ))
+  end
+  def test_features
+    assert_equal  @parser.features("abCdE"), ["abCdE",true,'abC','abcde']
+  end
+  def test_template
+    template =<<-EOT
+UisLetters: %x[0,1]
+Uprefix_3: %x[0,2]
+Uprefix_3#1: %x[1,2]
+Uprefix_3#-1: %x[-1,2]
+Udowncase: %x[0,3]
+Udowncase#1: %x[1,3]
+Udowncase#-1: %x[-1,3]
+B
+    EOT
+    assert(@parser.template == template)
+  end
+  def test_tokens
+    assert( NERFeatures.tokens("A new direct hemagglutination test (HI-GONAVIS) for urinary LH was compared with the serum\n radioimmuno-assay of LH.")==
+           ["A", "new", "direct", "hemagglutination", "test", "(", "HI", "-", "GONAVIS", ")", "for", "urinary", "LH", "was", "compared", "with", "the", "serum", "radioimmuno", "-", "assay", "of", "LH", "."])
+  end
+  def test_text_features
+    assert(@parser.text_features("abCdE 1234") == [["abCdE",true, "abC", "abcde"], ["1234",false, "123", "1234"]])
+    assert(@parser.text_features("abCdE 1234",true) == [["abCdE",true, "abC", "abcde",1], ["1234",false, "123", "1234",2]])
+    assert(@parser.text_features("abCdE 1234",false) == [["abCdE",true, "abC", "abcde",0], ["1234",false, "123", "1234",0]])
+  end
+  def test_tagged_features
+    assert_equal(
+      [["phosphorilation",true, "pho", "phosphorilation", 0],
+        ["of",true, false, "of", 0],
+        ["GENE1",false, "GEN", "gene1", 1],
+        [".", false, false, ".", 0]],
+      @parser.tagged_features("phosphorilation of GENE1.",['GENE1']))
+      assert_equal(
+        [["GENE1",false, "GEN", "gene1", 1],
+          ["phosphorilation",true, "pho", "phosphorilation", 0]],
+      @parser.tagged_features("GENE1 phosphorilation",['GENE1']))
+    assert_equal(
+           [["phosphorilation",true, "pho", "phosphorilation", 0],
+            ["of",true, false, "of", 0],
+            ["GENE",true, "GEN", "gene", 1],
+            ["1",false, false, "1", 2],
+            [".", false, false, ".", 0]],
+      @parser.tagged_features("phosphorilation of GENE 1.",['GENE 1']))
+  end
+  def test_tagged_features_reverse
+    @parser.reverse = true
+    assert_equal(
+      [
+        ["GENE1",false, "GEN", "gene1", 1],
+        ["of",true, false, "of", 0],
+        ["phosphorilation",true, "pho", "phosphorilation", 0]
+    ],
+    @parser.tagged_features("phosphorilation of GENE1",['GENE1']))
+    assert_equal(
+          [
+            [".", false, false, ".", 0],
+            ["1",false, false, "1", 1],
+            ["GENE",true, "GEN", "gene", 2],
+            ["of",true, false, "of", 0],
+            ["phosphorilation",true, "pho", "phosphorilation", 0]
+        ],
+    @parser.tagged_features("phosphorilation of GENE 1.",['GENE 1']))
+  end
+  def test_default_config
+    require 'rbbt/bow/misc'
+    text =<<-EOF
+This text explains how MDM2 interacts with TP53.
+    EOF
+    @parser = NERFeatures.new Rbbt.share.rner["config.rb"].find
+    features = @parser.tagged_features text, %w(TP53 MDM2)
+    assert features.first.first == "This"
+  end
+  def __test_CRFPP_install
+    assert(require File.join(Rbbt.datadir, 'third_party/crf++/ruby/CRFPP'))
+  end
+end

data/test/rbbt/ner/test_rnorm.rb CHANGED Viewed

@@ -43,4 +43,9 @@ S000000376	AAA	GENE1	DDD
   def test_order
     assert_equal(["S000000375"], @norm.resolve("GENE1"))
   end
+  def test_token_evaluate
+    iii @norm.token_evaluate("PDGFRA","PDGFRalpha")
+  end
 end

data/test/rbbt/segment/test_named_entity.rb CHANGED Viewed

@@ -24,7 +24,8 @@ class TestClass < Test::Unit::TestCase
   def test_tsv
     a = "test"
-    NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
+    NamedEntity.setup a, 10, "DocID", "TYPE", "CODE", "SCORE"
+    ppp Annotated.tsv([a,a])
     assert Annotated.tsv([a]).fields.include? "code"
     assert Annotated.tsv([a], nil).fields.include? "code"
     assert Annotated.tsv([a], :all).fields.include? "code"

data/test/rbbt/segment/test_transformed.rb CHANGED Viewed

@@ -144,7 +144,7 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
     gene2.entity_type = "Protein"
     Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
-      assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
+      assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene' title='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein' title='Protein'>CDK5R1</span> protein", a
     end
   end
@@ -165,7 +165,7 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
     gene2.entity_type = "Protein"
     Transformed.with_transform(a, [gene1,gene2], Proc.new{|e| e.html}) do
-      assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein'>CDK5R1</span> protein", a
+      assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene' title='Gene'>TP53</span> gene and the <span class='Entity' attr-entity-type='Protein' title='Protein'>CDK5R1</span> protein", a
     end
   end
@@ -185,9 +185,9 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
     assert_equal [gene1], Segment.overlaps(Segment.sort([gene1,gene2]))
     Transformed.with_transform(a, [gene1], Proc.new{|e| e.html}) do
-      assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene'>TP53</span> gene and the CDK5R1 protein", a
+      assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Gene' title='Gene'>TP53</span> gene and the CDK5R1 protein", a
       Transformed.with_transform(a, [gene2], Proc.new{|e| e.html}) do
-        assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Expanded Gene'><span class='Entity' attr-entity-type='Gene'>TP53</span> gene</span> and the CDK5R1 protein", a
+        assert_equal "This sentence mentions the <span class='Entity' attr-entity-type='Expanded Gene' title='Expanded Gene'><span class='Entity' attr-entity-type='Gene' title='Gene'>TP53</span> gene</span> and the CDK5R1 protein", a
       end
     end
   end
@@ -393,43 +393,26 @@ This is another sentence. Among the nonstructural proteins, the leader protein (
     end
   end
-  def ___test_transform
-    a = "This sentence mentions the TP53 gene and the CDK5 protein"
+  def test_transform_sorter_end
+    a = "The transcription factors farnesoid X receptor, small heterodimer partner, liver receptor homolog-1, and liver X receptor comprise the signaling cascade network that regulates the expression and secretion of apoM."
     original = a.dup
-    gene1 = "TP53"
+    gene1 = "liver receptor homolog-1"
     gene1.extend Segment
     gene1.offset = a.index gene1
-    gene2 = "CDK5"
+    gene2 = "apoM"
     gene2.extend Segment
     gene2.offset = a.index gene2
     assert_equal gene1, a[gene1.range]
     assert_equal gene2, a[gene2.range]
-    c = a.dup
-    c[gene2.range] = "GN"
-    assert_equal c, Transformed.transform(a,[gene2], "GN")
-    c[gene1.range] = "GN"
-    assert_equal c, Transformed.transform(a,[gene1], "GN")
-    iii a.transformation_offset_differences
-    raise
-    assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
-    assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
-    gene3 = "GN gene"
-    gene3.extend Segment
-    gene3.offset = a.index gene3
-    assert_equal gene3, a[gene3.range]
-    a.restore([gene3])
-    assert_equal original, a
-    assert_equal "TP53 gene", a[gene3.range]
+    Transformed.with_transform(a, [gene1], "[TF]") do
+      Transformed.with_transform(a, [gene2], "[TG]") do
+        assert_equal "The transcription factors farnesoid X receptor, small heterodimer partner, [TF], and liver X receptor comprise the signaling cascade network that regulates the expression and secretion of [TG].", a
+      end
+    end
   end