RubyGems - rbbt-text - Versions diffs - 1.2.0 → 1.3.4 - Mend

rbbt-text 1.2.0 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

checksums.yaml +4 -4
data/lib/rbbt/bow/bow.rb +5 -2
data/lib/rbbt/bow/dictionary.rb +27 -23
data/lib/rbbt/document.rb +55 -0
data/lib/rbbt/document/annotation.rb +45 -0
data/lib/rbbt/document/corpus.rb +63 -0
data/lib/rbbt/document/corpus/pubmed.rb +33 -0
data/lib/rbbt/ner/NER.rb +3 -3
data/lib/rbbt/ner/abner.rb +1 -1
data/lib/rbbt/ner/banner.rb +1 -1
data/lib/rbbt/ner/brat.rb +1 -1
data/lib/rbbt/ner/chemical_tagger.rb +1 -2
data/lib/rbbt/ner/g_norm_plus.rb +26 -3
data/lib/rbbt/ner/linnaeus.rb +3 -3
data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
data/lib/rbbt/ner/oscar3.rb +1 -2
data/lib/rbbt/ner/oscar4.rb +3 -3
data/lib/rbbt/ner/patterns.rb +5 -5
data/lib/rbbt/ner/regexpNER.rb +1 -2
data/lib/rbbt/ner/token_trieNER.rb +35 -22
data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
data/lib/rbbt/nlp/nlp.rb +5 -5
data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
data/lib/rbbt/nlp/spaCy.rb +52 -0
data/lib/rbbt/segment.rb +179 -0
data/lib/rbbt/segment/annotation.rb +58 -0
data/lib/rbbt/segment/encoding.rb +18 -0
data/lib/rbbt/{text/segment → segment}/named_entity.rb +14 -11
data/lib/rbbt/segment/overlaps.rb +63 -0
data/lib/rbbt/segment/range_index.rb +35 -0
data/lib/rbbt/segment/relationship.rb +7 -0
data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
data/lib/rbbt/segment/token.rb +23 -0
data/lib/rbbt/{text/segment → segment}/transformed.rb +12 -10
data/lib/rbbt/segment/tsv.rb +41 -0
data/share/install/software/Linnaeus +1 -1
data/share/install/software/OpenNLP +1 -1
data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
data/test/rbbt/document/test_annotation.rb +140 -0
data/test/rbbt/document/test_corpus.rb +33 -0
data/test/rbbt/ner/test_finder.rb +3 -3
data/test/rbbt/ner/test_g_norm_plus.rb +20 -2
data/test/rbbt/ner/test_patterns.rb +9 -9
data/test/rbbt/ner/test_regexpNER.rb +14 -14
data/test/rbbt/ner/test_rnorm.rb +3 -4
data/test/rbbt/ner/test_token_trieNER.rb +1 -0
data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
data/test/rbbt/segment/test_annotation.rb +39 -0
data/test/rbbt/segment/test_corpus.rb +36 -0
data/test/rbbt/segment/test_encoding.rb +24 -0
data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
data/test/rbbt/segment/test_overlaps.rb +69 -0
data/test/rbbt/segment/test_range_index.rb +42 -0
data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
data/test/rbbt/test_document.rb +14 -0
data/test/rbbt/test_segment.rb +182 -0
data/test/test_helper.rb +5 -3
data/test/test_spaCy.rb +32 -0
metadata +44 -32
data/lib/rbbt/text/corpus.rb +0 -106
data/lib/rbbt/text/corpus/document.rb +0 -383
data/lib/rbbt/text/corpus/document_repo.rb +0 -68
data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
data/lib/rbbt/text/document.rb +0 -39
data/lib/rbbt/text/segment.rb +0 -363
data/lib/rbbt/text/segment/docid.rb +0 -46
data/lib/rbbt/text/segment/relationship.rb +0 -24
data/lib/rbbt/text/segment/token.rb +0 -49
data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
data/test/rbbt/text/corpus/test_document.rb +0 -82
data/test/rbbt/text/segment/test_relationship.rb +0 -0
data/test/rbbt/text/segment/test_segmented.rb +0 -23
data/test/rbbt/text/test_corpus.rb +0 -34
data/test/rbbt/text/test_document.rb +0 -58
data/test/rbbt/text/test_segment.rb +0 -100

data/test/test_helper.rb CHANGED

@@ -6,7 +6,7 @@ require 'rbbt'
 require 'rbbt/persist'
 require 'rbbt/util/tmpfile'
 require 'rbbt/util/log'
-require 'rbbt/text/corpus'
+#require 'rbbt/text/corpus'
 class Test::Unit::TestCase
   def get_test_datafile(file)
@@ -22,8 +22,10 @@ class Test::Unit::TestCase
     FileUtils.rm_rf Rbbt.tmp.test.find :user
     Persist::CONNECTIONS.values.each do |c| c.close end
     Persist::CONNECTIONS.clear
-    Corpus::DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
-    Corpus::DocumentRepo::TC_CONNECTIONS.clear
+    if defined? Corpus
+      Corpus::DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
+      Corpus::DocumentRepo::TC_CONNECTIONS.clear
+    end
   end
 end

data/test/test_spaCy.rb ADDED

@@ -0,0 +1,32 @@
+require File.join(File.expand_path(File.dirname(__FILE__)), '', 'test_helper.rb')
+require 'rbbt/nlp/spaCy'
+require 'rbbt/document/corpus'
+class TestSpaCy < Test::Unit::TestCase
+  def _test_tokens
+    text = "I tell a story"
+    tokens = SpaCy.tokens(text)
+    assert_equal 4, tokens.length
+    assert_equal "tell", tokens[1].to_s
+  end
+  def test_segments
+    text = "I tell a story. It's a very good story."
+    corpus = Document::Corpus.setup({})
+    Document.setup(text, "TEST", "test_doc1", "simple_sentence")
+    corpus.add_document text
+    text.corpus = corpus
+    segments = SpaCy.segments(text)
+    segments.each do |segment|
+      assert_equal segment, segment.segid.tap{|e| e.corpus = corpus}.segment
+    end
+  end
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rbbt-text
 version: !ruby/object:Gem::Version
-  version: 1.2.0
+  version: 1.3.4
 platform: ruby
 authors:
 - Miguel Vazquez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-04-16 00:00:00.000000000 Z
+date: 2020-07-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rbbt-util
@@ -78,6 +78,10 @@ files:
 - lib/rbbt/bow/bow.rb
 - lib/rbbt/bow/dictionary.rb
 - lib/rbbt/bow/misc.rb
+- lib/rbbt/document.rb
+- lib/rbbt/document/annotation.rb
+- lib/rbbt/document/corpus.rb
+- lib/rbbt/document/corpus/pubmed.rb
 - lib/rbbt/ner/NER.rb
 - lib/rbbt/ner/abner.rb
 - lib/rbbt/ner/banner.rb
@@ -98,18 +102,18 @@ files:
 - lib/rbbt/nlp/genia/sentence_splitter.rb
 - lib/rbbt/nlp/nlp.rb
 - lib/rbbt/nlp/open_nlp/sentence_splitter.rb
-- lib/rbbt/text/corpus.rb
-- lib/rbbt/text/corpus/document.rb
-- lib/rbbt/text/corpus/document_repo.rb
-- lib/rbbt/text/corpus/sources/pmid.rb
-- lib/rbbt/text/document.rb
-- lib/rbbt/text/segment.rb
-- lib/rbbt/text/segment/docid.rb
-- lib/rbbt/text/segment/named_entity.rb
-- lib/rbbt/text/segment/relationship.rb
-- lib/rbbt/text/segment/segmented.rb
-- lib/rbbt/text/segment/token.rb
-- lib/rbbt/text/segment/transformed.rb
+- lib/rbbt/nlp/spaCy.rb
+- lib/rbbt/segment.rb
+- lib/rbbt/segment/annotation.rb
+- lib/rbbt/segment/encoding.rb
+- lib/rbbt/segment/named_entity.rb
+- lib/rbbt/segment/overlaps.rb
+- lib/rbbt/segment/range_index.rb
+- lib/rbbt/segment/relationship.rb
+- lib/rbbt/segment/segmented.rb
+- lib/rbbt/segment/token.rb
+- lib/rbbt/segment/transformed.rb
+- lib/rbbt/segment/tsv.rb
 - share/install/software/ABNER
 - share/install/software/BANNER
 - share/install/software/ChemicalTagger
@@ -128,6 +132,9 @@ files:
 - test/rbbt/bow/test_bow.rb
 - test/rbbt/bow/test_dictionary.rb
 - test/rbbt/bow/test_misc.rb
+- test/rbbt/document/corpus/test_pubmed.rb
+- test/rbbt/document/test_annotation.rb
+- test/rbbt/document/test_corpus.rb
 - test/rbbt/entity/test_document.rb
 - test/rbbt/ner/test_NER.rb
 - test/rbbt/ner/test_abner.rb
@@ -146,16 +153,17 @@ files:
 - test/rbbt/nlp/genia/test_sentence_splitter.rb
 - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
 - test/rbbt/nlp/test_nlp.rb
-- test/rbbt/text/corpus/sources/test_pmid.rb
-- test/rbbt/text/corpus/test_document.rb
-- test/rbbt/text/segment/test_named_entity.rb
-- test/rbbt/text/segment/test_relationship.rb
-- test/rbbt/text/segment/test_segmented.rb
-- test/rbbt/text/segment/test_transformed.rb
-- test/rbbt/text/test_corpus.rb
-- test/rbbt/text/test_document.rb
-- test/rbbt/text/test_segment.rb
+- test/rbbt/segment/test_annotation.rb
+- test/rbbt/segment/test_corpus.rb
+- test/rbbt/segment/test_encoding.rb
+- test/rbbt/segment/test_named_entity.rb
+- test/rbbt/segment/test_overlaps.rb
+- test/rbbt/segment/test_range_index.rb
+- test/rbbt/segment/test_transformed.rb
+- test/rbbt/test_document.rb
+- test/rbbt/test_segment.rb
 - test/test_helper.rb
+- test/test_spaCy.rb
 homepage: http://github.com/mikisvaz/rbbt-util
 licenses: []
 metadata: {}
@@ -182,18 +190,13 @@ test_files:
 - test/rbbt/nlp/test_nlp.rb
 - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
 - test/rbbt/nlp/genia/test_sentence_splitter.rb
-- test/rbbt/text/test_document.rb
-- test/rbbt/text/corpus/sources/test_pmid.rb
-- test/rbbt/text/corpus/test_document.rb
-- test/rbbt/text/test_segment.rb
-- test/rbbt/text/test_corpus.rb
-- test/rbbt/text/segment/test_transformed.rb
-- test/rbbt/text/segment/test_relationship.rb
-- test/rbbt/text/segment/test_named_entity.rb
-- test/rbbt/text/segment/test_segmented.rb
 - test/rbbt/bow/test_bow.rb
 - test/rbbt/bow/test_misc.rb
 - test/rbbt/bow/test_dictionary.rb
+- test/rbbt/test_document.rb
+- test/rbbt/document/test_annotation.rb
+- test/rbbt/document/corpus/test_pubmed.rb
+- test/rbbt/document/test_corpus.rb
 - test/rbbt/entity/test_document.rb
 - test/rbbt/ner/test_patterns.rb
 - test/rbbt/ner/test_NER.rb
@@ -209,4 +212,13 @@ test_files:
 - test/rbbt/ner/test_finder.rb
 - test/rbbt/ner/test_linnaeus.rb
 - test/rbbt/ner/test_oscar4.rb
+- test/rbbt/test_segment.rb
+- test/rbbt/segment/test_transformed.rb
+- test/rbbt/segment/test_overlaps.rb
+- test/rbbt/segment/test_annotation.rb
+- test/rbbt/segment/test_named_entity.rb
+- test/rbbt/segment/test_encoding.rb
+- test/rbbt/segment/test_range_index.rb
+- test/rbbt/segment/test_corpus.rb
+- test/test_spaCy.rb
 - test/test_helper.rb

data/lib/rbbt/text/corpus.rb DELETED

@@ -1,106 +0,0 @@
-require 'rbbt/text/corpus/document'
-require 'rbbt/text/corpus/document_repo'
-class Corpus
-  class << self
-    attr_accessor :claims
-    def claim(namespace, &block)
-      @@claims = {}
-      @@claims[namespace] = block
-    end
-  end
-  attr_accessor :corpora_path, :document_repo, :persistence_dir, :global_annotations
-  def initialize(corpora_path = nil)
-    @corpora_path = case
-                   when corpora_path.nil?
-                     Rbbt.corpora
-                   when (not Path === corpora_path)
-                     Path.setup(corpora_path)
-                   else
-                     corpora_path
-                   end
-    @corpora_path = @corpora_path.find
-    @persistence_dir = File.join(@corpora_path, "annotations")
-    Misc.lock(@persistence_dir) do
-      @global_annotations = TSV.setup(Persist.open_tokyocabinet(File.join(@persistence_dir, "global_annotations"), false, :list), :key => "ID", :fields => ["Start", "End", "JSON", "Document ID", "Entity Type"])
-      @global_annotations.unnamed = true
-      @global_annotations.close
-    end
-    Misc.lock(@corpora_path.document_repo) do
-      @document_repo   = DocumentRepo.open_tokyocabinet @corpora_path.document_repo, false
-      @document_repo.close
-    end
- end
-  def persistence_for(docid)
-    File.join(persistence_dir, docid)
-  end
-  def docid(docid)
-    begin
-      if @document_repo.include?(docid)
-        Document.new(persistence_for(docid), docid, @document_repo[docid], @global_annotations, self)
-      else
-        namespace, id, type = docid.split(":")
-        if @@claims.include?(namespace)
-          docid = self.instance_exec id, type, &(@@claims[namespace])
-          docid = docid.first if Array === docid
-          self.docid(docid)
-        else
-          raise "Document '#{ docid }' was not found." unless @document_repo.include? docid
-        end
-      end
-    ensure
-      @document_repo.close
-    end
-  end
-  def document(namespace, id, type, hash)
-    docid = [namespace, id, type, hash] * ":"
-    self.docid(docid)
-  end
-  def add_document(text, namespace = nil, id = nil, type = nil)
-    text = Misc.fixutf8(text)
-    hash = Digest::MD5.hexdigest(text)
-    @document_repo.add(text, namespace, id, type, hash)
-  end
-  def add_docid(text, docid)
-    namespace, id, type, hash = docid.split(":")
-    @document_repo.add(text, namespace, id, type, hash)
-  end
-  def find(namespace=nil, id = nil, type = nil, hash = nil)
-    @document_repo.find(namespace, id, type, hash).collect{|docid|
-      self.docid(docid)
-    }
-  end
-  def find_docid(docid)
-    @document_repo.find_docid(docid).collect{|docid|
-      self.docid(docid)
-    }
-  end
-  def exists?(namespace=nil, id = nil, type = nil, hash = nil)
-    find(namespace, id, type, hash).any?
-  end
-  def [](docid)
-    self.docid(docid)
-  end
-  def include?(id)
-    @document_repo.include? id
-  end
-end

data/lib/rbbt/text/corpus/document.rb DELETED

@@ -1,383 +0,0 @@
-require 'rbbt/text/segment'
-require 'rbbt/text/segment/segmented'
-require 'rbbt/text/segment/docid'
-require 'rbbt/tsv'
-require 'rbbt/resource/path'
-require 'rbbt/persist/tsv'
-require 'rbbt/util/misc'
-require 'rbbt/text/document'
-require 'json'
-class Corpus
-  class Document
-    class MultipleEntity < Exception; end
-    attr_accessor :text, :docid, :namespace, :id, :type, :hash, :segments, :segment_indices, :persist_dir, :global_persistence, :corpus
-    attr_accessor :multiple_result
-    def initialize(persist_dir = nil, docid = nil, text = nil, global_persistence = nil, corpus = nil)
-      @segments = {}
-      @segment_indices = {}
-      @corpus = corpus
-      if not persist_dir.nil?
-        @persist_dir = persist_dir
-        @persist_dir = Path.setup(@persist_dir) if not Path == @persist_dir
-      end
-      @global_persistence = global_persistence
-      if not docid.nil?
-        @docid = docid
-        update_docid
-      end
-      @text = text unless text.nil?
-    end
-    def update_docid
-      @namespace, @id, @type, @hash = docid.split(":", -1)
-    end
-    def docid=(docid)
-      @docid = docid
-      update_docid
-    end
-    def self.define(entity, &block)
-      send :define_method, "produce_#{entity}" do
-        segments = self.instance_exec &block
-        segments.each{|s| s.docid = docid }
-      end
-      self.class_eval <<-EOC, __FILE__, __LINE__ + 1
-        def load_#{entity}(raw = false)
-          return if segments.include? "#{ entity }"
-          if self.respond_to?("load_with_persistence_#{entity}") and not @persist_dir.nil?
-            entities = load_with_persistence_#{entity}(raw)
-          else
-            entities = produce_#{entity}
-          end
-          segments["#{ entity }"] = entities
-        end
-        def #{entity}(raw = false)
-          begin
-            entities = segments["#{ entity }"]
-            if entities.nil?
-              load_#{entity}(raw)
-              entities = segments["#{ entity }"]
-            end
-          end
-          entities
-        end
-        def #{entity}_at(pos, persist = false)
-          segment_index("#{ entity }", persist ? File.join(@persist_dir, 'ranges') : nil)[pos]
-        end
-      EOC
-    end
-    def self.define_multiple(entity, &block)
-      send :define_method, "produce_#{entity}" do
-        if self.multiple_result && self.multiple_result[entity]
-          segments = self.multiple_result[entity]
-          return segments.each{|s| s.docid = docid }
-        end
-        raise MultipleEntity, "Entity #{entity} runs with multiple documents, please prepare beforehand with prepare_multiple: #{self.docid}"
-      end
-      name = "multiple_produce_#{entity}"
-      class << self
-        self
-      end.send :define_method, name, &block
-      self.class_eval <<-EOC, __FILE__, __LINE__ + 1
-        def load_#{entity}(raw = false)
-          return if segments.include? "#{ entity }"
-          if self.respond_to?("load_with_persistence_#{entity}") and not @persist_dir.nil?
-            entities = load_with_persistence_#{entity}(raw)
-          else
-            entities = produce_#{entity}
-          end
-          segments["#{ entity }"] = entities
-        end
-        def #{entity}(raw = false)
-          begin
-            entities = segments["#{ entity }"]
-            if entities.nil?
-              load_#{entity}(raw)
-              entities = segments["#{ entity }"]
-            end
-          end
-          entities
-        end
-        def #{entity}_at(pos, persist = false)
-          segment_index("#{ entity }", persist ? File.join(@persist_dir, 'ranges') : nil)[pos]
-        end
-      EOC
-    end
-    def self.prepare_multiple(docs, entity)
-      missing = []
-      docs.each do |doc|
-        begin
-          doc.send(entity)
-        rescue MultipleEntity
-          missing << doc
-        end
-      end
-      res = self.send("multiple_produce_#{entity.to_s}", missing) if missing.any?
-      case res
-      when Array
-        res.each_with_index do |res,i|
-          missing[i].multiple_result ||= {}
-          missing[i].multiple_result[entity] = res
-        end
-      when Hash
-        res.each do |document,res|
-          case document
-          when Corpus::Document
-            document.multiple_result[entity] = res
-          when String
-            document = missing.select{|d| d.docid == document}.first
-            document.multiple_result[entity] = res
-          end
-        end
-      end
-      missing.each{|doc|
-        doc.send entity
-      }
-    end
-    #{{{ PERSISTENCE
-    TSV_REPOS = {}
-    FIELDS_FOR_ENTITY_PERSISTENCE = {}
-    def self.persist(entity, fields = nil)
-      if not fields.nil?
-        fields = [fields] if not Array === fields
-        fields = fields.collect{|f| f.to_s}
-        FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields
-      end
-      self.class_eval <<-EOC, __FILE__, __LINE__
-        def load_with_persistence_#{entity}(raw = false)
-          fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
-          tsv_file = File.join(@persist_dir.find, "#{ entity }")
-          return nil if raw == :check and File.exists? tsv_file
-          annotations = Persist.persist("Entity[#{ entity }]", :tsv, :file => tsv_file) do
-            segments = produce_#{entity}
-            tsv = Segment.tsv(segments, fields)
-          end
-          return annotations if raw
-          annotations.unnamed = true
-          annotations.collect{|id, annotation|
-            Segment.load_tsv_values(text, annotation, annotations.fields)
-          }
-        end
-            EOC
-    end
-    def self.persist_in_tsv(entity, tsv = nil, fields = nil)
-      tsv = TSV.setup(Persist.open_tokyocabinet(tsv, false, :list), :key => "ID", :fields => ["Start", "End", "JSON", "Document ID", "Entity Type"]).tap{|t| t.unnamed = true, t.close} if Path === tsv
-      if ! tsv.nil? && ! tsv.respond_to?(:keys)
-        fields = tsv
-        tsv = nil
-      end
-      TSV_REPOS[entity.to_s] = tsv
-      if ! fields.nil?
-        fields = [fields] if not Array === fields
-        fields = fields.collect{|f| f.to_s}
-        FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields unless fields.nil?
-      end
-      self.class_eval <<-EOC, __FILE__, __LINE__ + 1
-        def load_with_persistence_#{entity}(raw = false)
-          repo = TSV_REPOS["#{ entity }"]
-          if repo.nil?
-            raise "No persistence file or persistence dir for persist_in_tsv" if persist_dir.nil?
-            repo = Persist.open_tokyocabinet(persist_dir.annotations_by_type.find, true, :marshal_tsv)
-          end
-          fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
-          begin
-            if ! repo.include?("#{ entity }")
-              segments = produce_#{entity}
-              repo.write_and_read do
-                repo["#{entity}"] = Segment.tsv(segments, fields) if segments.any?
-              end
-            else
-              if raw == :check
-                repo.close
-                return nil
-              end
-            end
-            annotations = repo["#{entity}"]
-            repo.close
-            return annotations if raw
-            annotations.unnamed = true
-            annotations.collect{|id, annotation|
-              Segment.load_tsv_values(text, annotation, annotations.fields)
-            }
-          ensure
-            repo.close
-          end
-        end
-      EOC
-    end
-    def self.persist_in_global_tsv(entity, tsv = nil, fields = nil, doc_field = nil, entity_field = nil)
-      tsv = TSV.setup(Persist.open_tokyocabinet(tsv, false, :list), :key => "ID", :fields => (fields || ["Start", "End", "JSON", "Document ID", "Entity Type"])).tap{|t| t.unnamed = true, t.close} if Path === tsv
-      doc_field ||= "Document ID"
-      entity_field ||= "Entity Type"
-      TSV_REPOS[entity.to_s] = tsv
-      if not fields.nil?
-        fields = [fields] if not Array === fields
-        fields = fields.collect{|f| f.to_s}
-      else
-        fields = nil
-      end
-      FIELDS_FOR_ENTITY_PERSISTENCE[entity.to_s] = fields
-      self.class_eval <<-EOC, __FILE__, __LINE__ + 1
-        def load_with_persistence_#{entity}(raw = false)
-          fields = FIELDS_FOR_ENTITY_PERSISTENCE["#{ entity }"]
-          data = TSV_REPOS["#{ entity }"] || @global_persistence
-          begin
-            if data.respond_to? :persistence_path and String === data.persistence_path
-              data.filter(data.persistence_path + '.filters')
-            end
-            keys = data.read_and_close do
-              fields = data.fields if fields.nil? and data.respond_to? :fields
-              data.add_filter("field:#{ doc_field }", @docid) if fields.include?("#{doc_field}")
-              data.add_filter("field:#{ entity_field }", "#{ entity }") if fields.include?("#{entity_field}")
-              keys = data.keys
-              data.pop_filter if fields.include?("#{entity_field}")
-              data.pop_filter if fields.include?("#{doc_field}")
-              keys
-            end
-            if keys.empty?
-              segments = produce_#{entity}
-              segments << Segment.setup("No #{entity} found in document " + @docid.to_s, -1) if segments.empty?
-              tsv = Segment.tsv(segments, *fields.reject{|f| ["#{doc_field}", "#{entity_field}", "Start", "End", "annotation_types"].include? f})
-              tsv.add_field "#{ doc_field }" do
-                @docid
-              end
-              tsv.add_field "#{ entity_field }" do
-                "#{ entity }"
-              end
-              keys = data.write_and_close do
-                data.add_filter("field:#{ doc_field }", @docid) if fields.include?("#{doc_field}")
-                data.add_filter("field:#{ entity_field }", "#{ entity }") if fields.include?("#{entity_field}")
-                keys = tsv.collect do |key, value|
-                  data[key] = value
-                  key
-                end
-                data.pop_filter if fields.include?("#{entity_field}")
-                data.pop_filter if fields.include?("#{doc_field}")
-                keys
-              end
-            else
-              return nil if raw == :check
-            end
-            return data.values if raw
-            start_pos = data.identify_field "Start"
-            data.read_and_close do
-              data.chunked_values_at(keys).collect{|annotation|
-                  begin
-                pos = annotation[start_pos]
-                Segment.load_tsv_values(text, annotation, fields) unless [-1, "-1", [-1], ["-1"]].include?(pos)
-                  rescue
-                    Log.exception $!
-                    iif keys
-                    iif [text, annotation]
-                  end
-              }.compact
-            end
-          ensure
-            data.close
-          end
-        end
-        EOC
-    end
-    def segment_index(name, persist_dir = nil)
-      @segment_indices[name] ||= Segment.index(self.send(name), persist_dir.nil? ? :memory : File.join(persist_dir, name + '.range'))
-    end
-    def load_into(segment, *annotations)
-      options = annotations.pop if Hash === annotations.last
-      options ||= {}
-      if options[:persist] and not @persist_dir.nil?
-        persist_dir = File.join(@persist_dir, 'ranges')
-      else
-        persist_dir = nil
-      end
-      Segmented.setup(segment, {})
-      annotations.collect do |name|
-        name = name.to_s
-        index = segment_index(name, persist_dir)
-        annotations = index[segment.range]
-        segment.segments[name] ||= {}
-        segment.segments[name] = annotations
-        class << segment
-          self
-        end.class_eval "def #{ name }; @segments['#{ name }']; end", __FILE__, __LINE__ + 1
-      end
-      segment
-    end
-    def entity
-      Object::Document.setup(self.docid, corpus)
-    end
-  end
-end