rbbt-text 1.2.0 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/bow/bow.rb +5 -2
  3. data/lib/rbbt/bow/dictionary.rb +27 -23
  4. data/lib/rbbt/document.rb +55 -0
  5. data/lib/rbbt/document/annotation.rb +45 -0
  6. data/lib/rbbt/document/corpus.rb +63 -0
  7. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  8. data/lib/rbbt/ner/NER.rb +3 -3
  9. data/lib/rbbt/ner/abner.rb +1 -1
  10. data/lib/rbbt/ner/banner.rb +1 -1
  11. data/lib/rbbt/ner/brat.rb +1 -1
  12. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  13. data/lib/rbbt/ner/g_norm_plus.rb +26 -3
  14. data/lib/rbbt/ner/linnaeus.rb +3 -3
  15. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  16. data/lib/rbbt/ner/oscar3.rb +1 -2
  17. data/lib/rbbt/ner/oscar4.rb +3 -3
  18. data/lib/rbbt/ner/patterns.rb +5 -5
  19. data/lib/rbbt/ner/regexpNER.rb +1 -2
  20. data/lib/rbbt/ner/token_trieNER.rb +35 -22
  21. data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
  22. data/lib/rbbt/nlp/nlp.rb +5 -5
  23. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  24. data/lib/rbbt/nlp/spaCy.rb +52 -0
  25. data/lib/rbbt/segment.rb +179 -0
  26. data/lib/rbbt/segment/annotation.rb +58 -0
  27. data/lib/rbbt/segment/encoding.rb +18 -0
  28. data/lib/rbbt/{text/segment → segment}/named_entity.rb +14 -11
  29. data/lib/rbbt/segment/overlaps.rb +63 -0
  30. data/lib/rbbt/segment/range_index.rb +35 -0
  31. data/lib/rbbt/segment/relationship.rb +7 -0
  32. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  33. data/lib/rbbt/segment/token.rb +23 -0
  34. data/lib/rbbt/{text/segment → segment}/transformed.rb +12 -10
  35. data/lib/rbbt/segment/tsv.rb +41 -0
  36. data/share/install/software/Linnaeus +1 -1
  37. data/share/install/software/OpenNLP +1 -1
  38. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  39. data/test/rbbt/document/test_annotation.rb +140 -0
  40. data/test/rbbt/document/test_corpus.rb +33 -0
  41. data/test/rbbt/ner/test_finder.rb +3 -3
  42. data/test/rbbt/ner/test_g_norm_plus.rb +20 -2
  43. data/test/rbbt/ner/test_patterns.rb +9 -9
  44. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  45. data/test/rbbt/ner/test_rnorm.rb +3 -4
  46. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  47. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
  48. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  49. data/test/rbbt/segment/test_annotation.rb +39 -0
  50. data/test/rbbt/segment/test_corpus.rb +36 -0
  51. data/test/rbbt/segment/test_encoding.rb +24 -0
  52. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
  53. data/test/rbbt/segment/test_overlaps.rb +69 -0
  54. data/test/rbbt/segment/test_range_index.rb +42 -0
  55. data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
  56. data/test/rbbt/test_document.rb +14 -0
  57. data/test/rbbt/test_segment.rb +182 -0
  58. data/test/test_helper.rb +5 -3
  59. data/test/test_spaCy.rb +32 -0
  60. metadata +44 -32
  61. data/lib/rbbt/text/corpus.rb +0 -106
  62. data/lib/rbbt/text/corpus/document.rb +0 -383
  63. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  64. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  65. data/lib/rbbt/text/document.rb +0 -39
  66. data/lib/rbbt/text/segment.rb +0 -363
  67. data/lib/rbbt/text/segment/docid.rb +0 -46
  68. data/lib/rbbt/text/segment/relationship.rb +0 -24
  69. data/lib/rbbt/text/segment/token.rb +0 -49
  70. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  71. data/test/rbbt/text/corpus/test_document.rb +0 -82
  72. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  73. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  74. data/test/rbbt/text/test_corpus.rb +0 -34
  75. data/test/rbbt/text/test_document.rb +0 -58
  76. data/test/rbbt/text/test_segment.rb +0 -100
@@ -0,0 +1,33 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+
5
+ class TestDocumentCorpus < Test::Unit::TestCase
6
+ def test_corpus
7
+ text = "This is a document"
8
+ Document.setup(text, "TEST", "test_doc1", nil)
9
+
10
+ corpus = Document::Corpus.setup({})
11
+
12
+ corpus.add_document(text)
13
+
14
+ docid = text.docid(corpus)
15
+
16
+ assert_equal docid.document, text
17
+ end
18
+
19
+ def test_find
20
+ text = "This is a document"
21
+ Document.setup(text, "TEST", "test_doc1", nil)
22
+
23
+ TmpFile.with_file do |path|
24
+ corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
25
+ corpus.extend Document::Corpus
26
+
27
+ corpus.add_document(text)
28
+
29
+ assert corpus.docids("TEST:").include?(text.docid)
30
+ end
31
+ end
32
+ end
33
+
@@ -8,13 +8,13 @@ require 'rbbt/sources/NCI'
8
8
 
9
9
  class TestFinder < Test::Unit::TestCase
10
10
 
11
- def test_namespace_and_format
11
+ def _test_namespace_and_format
12
12
  f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
13
13
  assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
14
14
  assert_equal "Ensembl Gene ID", f.instances.first.format
15
15
  end
16
16
 
17
- def test_find
17
+ def _test_find
18
18
  f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["SF3B1"])
19
19
 
20
20
  assert_equal "ENSG00000115524", f.find("SF3B1").first
@@ -23,7 +23,7 @@ class TestFinder < Test::Unit::TestCase
23
23
  end
24
24
  end
25
25
 
26
- def test_find2
26
+ def _test_find2
27
27
  f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
28
28
 
29
29
  m = f.find("RAS").first
@@ -5,11 +5,29 @@ Log.severity = 0
5
5
  class TestGNormPlus < Test::Unit::TestCase
6
6
  def test_match
7
7
  text =<<-EOF
8
- We found that TP53 is regulated by MDM2 in Homo sapiens
8
+
9
+ Introduction
10
+
11
+ We found that TP53 is regulated by MDM2 in Homo
12
+ sapiens
9
13
  EOF
10
14
 
11
15
  mentions = GNormPlus.process({:file => text})
12
- Log.tsv mentions
16
+
17
+ assert_equal 1, mentions.length
18
+ assert_equal 3, mentions["file"].length
19
+ end
20
+
21
+ def test_entities
22
+ text =<<-EOF
23
+ We found that TP53 is regulated by MDM2 in Homo sapiens
24
+ EOF
25
+
26
+ mentions = GNormPlus.entities({:file => text})
27
+ assert mentions["file"].include?("TP53")
28
+ mentions["file"].each do |mention|
29
+ assert_equal mention, text[mention.range].sub("\n", ' ')
30
+ end
13
31
  end
14
32
  end
15
33
 
@@ -2,17 +2,17 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.r
2
2
  require 'rbbt/ner/patterns'
3
3
 
4
4
  class TestPatternRelExt < Test::Unit::TestCase
5
- def test_simple_pattern
5
+ def _test_simple_pattern
6
6
  text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
7
7
 
8
8
  gene1 = "TP53"
9
- NamedEntity.setup(gene1, text.index(gene1), "Gene")
9
+ NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
10
10
 
11
11
  gene2 = "CDK5"
12
- NamedEntity.setup(gene2, text.index(gene2), "Gene")
12
+ NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
13
13
 
14
14
  interaction = "interacts"
15
- NamedEntity.setup(interaction, text.index(interaction), "Interaction")
15
+ NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
16
16
 
17
17
  Segmented.setup(text, [gene1, gene2, interaction])
18
18
 
@@ -23,13 +23,13 @@ class TestPatternRelExt < Test::Unit::TestCase
23
23
  text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
24
24
 
25
25
  gene1 = "TP53"
26
- NamedEntity.setup(gene1, text.index(gene1), "Gene")
26
+ NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
27
27
 
28
28
  gene2 = "CDK5"
29
- NamedEntity.setup(gene2, text.index(gene2), "Gene")
29
+ NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
30
30
 
31
31
  interaction = "interacts"
32
- NamedEntity.setup(interaction, text.index(interaction), "Interaction")
32
+ NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
33
33
 
34
34
  Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
35
35
 
@@ -40,7 +40,7 @@ class TestPatternRelExt < Test::Unit::TestCase
40
40
  PatternRelExt.new(["NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]"]).match_sentences([text]).first.first
41
41
  end
42
42
 
43
- def test_chunk_pattern
43
+ def _test_chunk_pattern
44
44
  text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
45
45
 
46
46
  drug = "thiazolidinediones"
@@ -57,7 +57,7 @@ class TestPatternRelExt < Test::Unit::TestCase
57
57
  end
58
58
 
59
59
 
60
- def test_entities_with_spaces
60
+ def _test_entities_with_spaces
61
61
  PatternRelExt.new("NP[entity:Gene Name]").token_trie
62
62
  end
63
63
 
@@ -23,9 +23,9 @@ class TestRegExpNER < Test::Unit::TestCase
23
23
  matches = RegExpNER.match_regexp_hash(sentence, regexp_hash)
24
24
 
25
25
  assert_equal ["this", "this", "that"].sort, matches.sort
26
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
27
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
28
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
26
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
27
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
28
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
29
29
  end
30
30
 
31
31
  def test_define_regexps
@@ -39,9 +39,9 @@ class TestRegExpNER < Test::Unit::TestCase
39
39
 
40
40
  matches = ner.entities(sentence)
41
41
  assert_equal ["this", "this", "that"].sort, matches.sort
42
- assert_equal "In ".length, matches.select{|m| m.type == :this }[0].offset
43
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this }[1].offset
44
- assert_equal :this, matches.select{|m| m.type == :this }[0].type
42
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this }[0].offset
43
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this }[1].offset
44
+ assert_equal :this, matches.select{|m| m.entity_type == :this }[0].entity_type
45
45
  end
46
46
 
47
47
 
@@ -51,9 +51,9 @@ class TestRegExpNER < Test::Unit::TestCase
51
51
  ner = RegExpNER.new({:this => /this/, :that => /that/})
52
52
  matches = ner.entities(sentence)
53
53
  assert_equal ["this", "this", "that"].sort, matches.sort
54
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
55
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
56
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
54
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
55
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
56
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
57
57
 
58
58
  Segmented.setup(sentence)
59
59
  ner_this = RegExpNER.new({:this => /this/})
@@ -64,9 +64,9 @@ class TestRegExpNER < Test::Unit::TestCase
64
64
  matches = sentence.segments
65
65
 
66
66
  assert_equal ["this", "this", "that"].sort, matches.sort
67
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
68
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
69
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
67
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
68
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
69
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
70
70
  end
71
71
 
72
72
  def test_entities_captures
@@ -75,8 +75,8 @@ class TestRegExpNER < Test::Unit::TestCase
75
75
  ner = RegExpNER.new({:this => /this/, :that => /that/, :should => /I (should)/})
76
76
  matches = ner.entities(sentence)
77
77
  assert_equal ["this", "this", "that", "should"].sort, matches.sort
78
- assert_equal "In this sentence I ".length, matches.select{|m| m.type == :should}[0].offset
79
- assert_equal :should, matches.select{|m| m.type == :should}[0].type
78
+ assert_equal "In this sentence I ".length, matches.select{|m| m.entity_type == :should}[0].offset
79
+ assert_equal :should, matches.select{|m| m.entity_type == :should}[0].entity_type
80
80
  end
81
81
 
82
82
 
@@ -27,10 +27,9 @@ S000000376 AAA GENE1 DDD
27
27
  assert_equal(["S000000029"], @norm.match("FUN21"))
28
28
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
29
29
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
30
- assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
31
- assert_equal([], @norm.match("GER4"))
32
-
33
- @norm.match("FUN21")
30
+ assert_equal(["S000000029"].sort, @norm.match("FUN 21").sort)
31
+ assert_equal([], @norm.match("Non-sense"))
32
+ assert_equal(["S000000029", "S000000374"], @norm.match("GER4"))
34
33
  end
35
34
 
36
35
  def test_select
@@ -74,6 +74,7 @@ C2;11;22;3 3;bb
74
74
  index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
75
75
 
76
76
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
+ assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
78
  end
78
79
  end
79
80
 
@@ -1,9 +1,43 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/genia/sentence_splitter'
3
3
 
4
- class TestClass < Test::Unit::TestCase
5
- def test_true
6
- assert true
4
+ class TestNLP < Test::Unit::TestCase
5
+ def test_sentences
6
+ text =<<-EOF
7
+ This is a sentence.
8
+ A funky character ™ in a sentence.
9
+ This is a sentence.
10
+ This is a broken
11
+ sentence. This is
12
+ another broken sentence.
13
+ EOF
14
+
15
+ iii NLP.geniass_sentence_splitter(text)
16
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter(text)[2].strip
17
+ end
18
+
19
+ def test_sentences_2
20
+ text =<<-EOF
21
+ This is a sentence.
22
+ This is a sentence.
23
+ This is a broken
24
+ sentence. This is
25
+ another broken sentence.
26
+ EOF
27
+
28
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter(text)[2].strip
29
+ end
30
+
31
+ def test_sentences_ext
32
+ text =<<-EOF
33
+ This is a sentence.
34
+ This is a sentence.
35
+ This is a broken
36
+ sentence. This is
37
+ another broken sentence.
38
+ EOF
39
+
40
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter_extension(text)[2].strip
7
41
  end
8
42
  end
9
43
 
@@ -1,6 +1,6 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/open_nlp/sentence_splitter'
3
- require 'rbbt/ner/segment'
3
+ require 'rbbt/segment'
4
4
 
5
5
  $text=<<-EOF
6
6
  Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
@@ -22,6 +22,22 @@ class TestClass < Test::Unit::TestCase
22
22
  def test_sentences
23
23
  text =<<-EOF
24
24
  This is a sentence.
25
+ No funky character in this sentence.
26
+ This is a sentence.
27
+ This is a
28
+ sentence. This is
29
+ another sentence.
30
+ EOF
31
+
32
+ assert_equal 5, OpenNLP.sentence_split_detector.sentDetect(text).length
33
+
34
+ assert_equal 5, OpenNLP.sentence_splitter(text).length
35
+ assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
+ end
37
+
38
+ def test_sentences_fix_utf8
39
+ text =<<-EOF
40
+ This is a sentence.
25
41
  A funky character ™ in a sentence.
26
42
  This is a sentence.
27
43
  This is a
@@ -35,12 +51,12 @@ another sentence.
35
51
  assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
52
  end
37
53
 
38
- def _test_text_sentences
54
+ def test_text_sentences
39
55
  Misc.benchmark(100) do
40
- OpenNLP.sentence_splitter($text).include? "Our
56
+ assert OpenNLP.sentence_splitter($text).include?("Our
41
57
  findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
42
58
  AT/RT and the usefulness of antibodies directed against SMARCA4 in this
43
- diagnostic setting."
59
+ diagnostic setting.")
44
60
  end
45
61
  end
46
62
  end
@@ -0,0 +1,39 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/annotation'
6
+
7
+ class TestAnnotation < Test::Unit::TestCase
8
+ def test_annotation
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
13
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
14
+
15
+ assert_equal 'verb', annotation.annotid.split(":")[5]
16
+
17
+ annotation = SegmentAnnotation.setup(segment.segid, :type => :verb)
18
+ assert_equal 'verb', annotation.annotid.split(":")[5]
19
+ end
20
+
21
+ def test_annotid
22
+ text = "This is a document"
23
+ Document.setup(text, "TEST", "test_doc1", nil)
24
+
25
+ corpus = Document::Corpus.setup({})
26
+
27
+ corpus.add_document(text)
28
+
29
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
30
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
31
+
32
+ annotid = annotation.annotid(corpus)
33
+
34
+ assert_equal 'verb', annotid.type
35
+ assert_equal 'verb', annotid.annotation.type
36
+ assert_equal 'is', annotid.annotation
37
+ end
38
+ end
39
+
@@ -0,0 +1,36 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/corpus'
6
+
7
+ class TestSegmentCorpus < Test::Unit::TestCase
8
+ def test_corpus
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ corpus = {}
13
+ corpus.extend Document::Corpus
14
+
15
+ corpus.add_document(text)
16
+
17
+ docid = text.docid(corpus)
18
+
19
+ assert_equal docid.document, text
20
+ end
21
+
22
+ def test_find
23
+ text = "This is a document"
24
+ Document.setup(text, "TEST", "test_doc1", nil)
25
+
26
+ TmpFile.with_file do |path|
27
+ corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
28
+ corpus.extend Document::Corpus
29
+
30
+ corpus.add_document(text)
31
+
32
+ assert corpus.prefix("TEST:").include?(text.docid)
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,24 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment/encoding'
3
+
4
+ class TestEncoding < Test::Unit::TestCase
5
+ def test_bad_chars
6
+ text = "A funky character ™ in a sentence."
7
+
8
+ assert_equal ["™"], Segment.bad_chars(text)
9
+ end
10
+
11
+ def test_ascii
12
+ text = "A funky character ™ in a sentence."
13
+
14
+ Segment.ascii(text) do
15
+ assert_equal "A funky character ? in a sentence.", text
16
+ end
17
+
18
+ Segment.ascii(text, "NONASCII") do
19
+ assert_equal "A funky character NONASCII in a sentence.", text
20
+ end
21
+
22
+ assert_equal "A funky character ™ in a sentence.", text
23
+ end
24
+ end
@@ -1,6 +1,6 @@
1
- require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/text/segment'
3
- require 'rbbt/text/segment/named_entity'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment'
3
+ require 'rbbt/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_info
@@ -15,35 +15,39 @@ class TestClass < Test::Unit::TestCase
15
15
 
16
16
  def test_all_args
17
17
  a = "test"
18
- NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
18
+ NamedEntity.setup a, 10, "TEST:doc1:test_type:hash", "NamedEntity", "TYPE", "CODE", "SCORE"
19
19
  assert_equal 10, a.offset
20
+ assert_equal "NamedEntity", a.type
21
+ assert_equal "TYPE", a.entity_type
22
+ assert_equal "SCORE", a.score
20
23
  end
21
24
 
22
25
  def test_tsv
23
26
  a = "test"
24
27
  NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
25
- assert Segment.tsv([a]).fields.include? "code"
26
- assert Segment.tsv([a], nil).fields.include? "code"
27
- assert Segment.tsv([a], "literal").fields.include? "code"
28
+ assert Annotated.tsv([a]).fields.include? "code"
29
+ assert Annotated.tsv([a], nil).fields.include? "code"
30
+ assert Annotated.tsv([a], :all).fields.include? "code"
31
+ assert Annotated.tsv([a], :all).fields.include? "literal"
28
32
  end
29
33
 
30
- def test_segment_brat
34
+ def __test_segment_brat
31
35
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
32
36
 
33
37
  gene1 = "TP53"
34
38
  gene1.extend NamedEntity
35
39
  gene1.offset = a.index gene1
36
- gene1.type = "Gene"
40
+ gene1.entity_type = "Gene"
37
41
 
38
42
  gene2 = "CDK5R1"
39
43
  gene2.extend NamedEntity
40
44
  gene2.offset = a.index gene2
41
- gene2.type = "Gene"
45
+ gene2.entity_type = "Gene"
42
46
 
43
47
  gene3 = "TP53 gene"
44
48
  gene3.extend NamedEntity
45
49
  gene3.offset = a.index gene3
46
- gene3.type = "Gene"
50
+ gene3.entity_type = "Gene"
47
51
 
48
52
  segments = [gene1, gene2, gene3]
49
53
  assert segments.collect{|s| s.to_brat}.include? "Gene 27 35"