rbbt-text 1.2.0 → 1.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/bow/bow.rb +5 -2
  3. data/lib/rbbt/bow/dictionary.rb +27 -23
  4. data/lib/rbbt/document.rb +55 -0
  5. data/lib/rbbt/document/annotation.rb +45 -0
  6. data/lib/rbbt/document/corpus.rb +63 -0
  7. data/lib/rbbt/document/corpus/pubmed.rb +33 -0
  8. data/lib/rbbt/ner/NER.rb +3 -3
  9. data/lib/rbbt/ner/abner.rb +1 -1
  10. data/lib/rbbt/ner/banner.rb +1 -1
  11. data/lib/rbbt/ner/brat.rb +1 -1
  12. data/lib/rbbt/ner/chemical_tagger.rb +1 -2
  13. data/lib/rbbt/ner/g_norm_plus.rb +26 -3
  14. data/lib/rbbt/ner/linnaeus.rb +3 -3
  15. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +3 -3
  16. data/lib/rbbt/ner/oscar3.rb +1 -2
  17. data/lib/rbbt/ner/oscar4.rb +3 -3
  18. data/lib/rbbt/ner/patterns.rb +5 -5
  19. data/lib/rbbt/ner/regexpNER.rb +1 -2
  20. data/lib/rbbt/ner/token_trieNER.rb +35 -22
  21. data/lib/rbbt/nlp/genia/sentence_splitter.rb +3 -2
  22. data/lib/rbbt/nlp/nlp.rb +5 -5
  23. data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +37 -36
  24. data/lib/rbbt/nlp/spaCy.rb +52 -0
  25. data/lib/rbbt/segment.rb +179 -0
  26. data/lib/rbbt/segment/annotation.rb +58 -0
  27. data/lib/rbbt/segment/encoding.rb +18 -0
  28. data/lib/rbbt/{text/segment → segment}/named_entity.rb +14 -11
  29. data/lib/rbbt/segment/overlaps.rb +63 -0
  30. data/lib/rbbt/segment/range_index.rb +35 -0
  31. data/lib/rbbt/segment/relationship.rb +7 -0
  32. data/lib/rbbt/{text/segment → segment}/segmented.rb +1 -1
  33. data/lib/rbbt/segment/token.rb +23 -0
  34. data/lib/rbbt/{text/segment → segment}/transformed.rb +12 -10
  35. data/lib/rbbt/segment/tsv.rb +41 -0
  36. data/share/install/software/Linnaeus +1 -1
  37. data/share/install/software/OpenNLP +1 -1
  38. data/test/rbbt/document/corpus/test_pubmed.rb +15 -0
  39. data/test/rbbt/document/test_annotation.rb +140 -0
  40. data/test/rbbt/document/test_corpus.rb +33 -0
  41. data/test/rbbt/ner/test_finder.rb +3 -3
  42. data/test/rbbt/ner/test_g_norm_plus.rb +20 -2
  43. data/test/rbbt/ner/test_patterns.rb +9 -9
  44. data/test/rbbt/ner/test_regexpNER.rb +14 -14
  45. data/test/rbbt/ner/test_rnorm.rb +3 -4
  46. data/test/rbbt/ner/test_token_trieNER.rb +1 -0
  47. data/test/rbbt/nlp/genia/test_sentence_splitter.rb +37 -3
  48. data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +20 -4
  49. data/test/rbbt/segment/test_annotation.rb +39 -0
  50. data/test/rbbt/segment/test_corpus.rb +36 -0
  51. data/test/rbbt/segment/test_encoding.rb +24 -0
  52. data/test/rbbt/{text/segment → segment}/test_named_entity.rb +15 -11
  53. data/test/rbbt/segment/test_overlaps.rb +69 -0
  54. data/test/rbbt/segment/test_range_index.rb +42 -0
  55. data/test/rbbt/{text/segment → segment}/test_transformed.rb +105 -51
  56. data/test/rbbt/test_document.rb +14 -0
  57. data/test/rbbt/test_segment.rb +182 -0
  58. data/test/test_helper.rb +5 -3
  59. data/test/test_spaCy.rb +32 -0
  60. metadata +44 -32
  61. data/lib/rbbt/text/corpus.rb +0 -106
  62. data/lib/rbbt/text/corpus/document.rb +0 -383
  63. data/lib/rbbt/text/corpus/document_repo.rb +0 -68
  64. data/lib/rbbt/text/corpus/sources/pmid.rb +0 -34
  65. data/lib/rbbt/text/document.rb +0 -39
  66. data/lib/rbbt/text/segment.rb +0 -363
  67. data/lib/rbbt/text/segment/docid.rb +0 -46
  68. data/lib/rbbt/text/segment/relationship.rb +0 -24
  69. data/lib/rbbt/text/segment/token.rb +0 -49
  70. data/test/rbbt/text/corpus/sources/test_pmid.rb +0 -33
  71. data/test/rbbt/text/corpus/test_document.rb +0 -82
  72. data/test/rbbt/text/segment/test_relationship.rb +0 -0
  73. data/test/rbbt/text/segment/test_segmented.rb +0 -23
  74. data/test/rbbt/text/test_corpus.rb +0 -34
  75. data/test/rbbt/text/test_document.rb +0 -58
  76. data/test/rbbt/text/test_segment.rb +0 -100
@@ -0,0 +1,33 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+
5
+ class TestDocumentCorpus < Test::Unit::TestCase
6
+ def test_corpus
7
+ text = "This is a document"
8
+ Document.setup(text, "TEST", "test_doc1", nil)
9
+
10
+ corpus = Document::Corpus.setup({})
11
+
12
+ corpus.add_document(text)
13
+
14
+ docid = text.docid(corpus)
15
+
16
+ assert_equal docid.document, text
17
+ end
18
+
19
+ def test_find
20
+ text = "This is a document"
21
+ Document.setup(text, "TEST", "test_doc1", nil)
22
+
23
+ TmpFile.with_file do |path|
24
+ corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
25
+ corpus.extend Document::Corpus
26
+
27
+ corpus.add_document(text)
28
+
29
+ assert corpus.docids("TEST:").include?(text.docid)
30
+ end
31
+ end
32
+ end
33
+
@@ -8,13 +8,13 @@ require 'rbbt/sources/NCI'
8
8
 
9
9
  class TestFinder < Test::Unit::TestCase
10
10
 
11
- def test_namespace_and_format
11
+ def _test_namespace_and_format
12
12
  f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
13
13
  assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
14
14
  assert_equal "Ensembl Gene ID", f.instances.first.format
15
15
  end
16
16
 
17
- def test_find
17
+ def _test_find
18
18
  f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["SF3B1"])
19
19
 
20
20
  assert_equal "ENSG00000115524", f.find("SF3B1").first
@@ -23,7 +23,7 @@ class TestFinder < Test::Unit::TestCase
23
23
  end
24
24
  end
25
25
 
26
- def test_find2
26
+ def _test_find2
27
27
  f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
28
28
 
29
29
  m = f.find("RAS").first
@@ -5,11 +5,29 @@ Log.severity = 0
5
5
  class TestGNormPlus < Test::Unit::TestCase
6
6
  def test_match
7
7
  text =<<-EOF
8
- We found that TP53 is regulated by MDM2 in Homo sapiens
8
+
9
+ Introduction
10
+
11
+ We found that TP53 is regulated by MDM2 in Homo
12
+ sapiens
9
13
  EOF
10
14
 
11
15
  mentions = GNormPlus.process({:file => text})
12
- Log.tsv mentions
16
+
17
+ assert_equal 1, mentions.length
18
+ assert_equal 3, mentions["file"].length
19
+ end
20
+
21
+ def test_entities
22
+ text =<<-EOF
23
+ We found that TP53 is regulated by MDM2 in Homo sapiens
24
+ EOF
25
+
26
+ mentions = GNormPlus.entities({:file => text})
27
+ assert mentions["file"].include?("TP53")
28
+ mentions["file"].each do |mention|
29
+ assert_equal mention, text[mention.range].sub("\n", ' ')
30
+ end
13
31
  end
14
32
  end
15
33
 
@@ -2,17 +2,17 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.r
2
2
  require 'rbbt/ner/patterns'
3
3
 
4
4
  class TestPatternRelExt < Test::Unit::TestCase
5
- def test_simple_pattern
5
+ def _test_simple_pattern
6
6
  text = "Experiments have shown that TP53 interacts with CDK5 under certain conditions"
7
7
 
8
8
  gene1 = "TP53"
9
- NamedEntity.setup(gene1, text.index(gene1), "Gene")
9
+ NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
10
10
 
11
11
  gene2 = "CDK5"
12
- NamedEntity.setup(gene2, text.index(gene2), "Gene")
12
+ NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
13
13
 
14
14
  interaction = "interacts"
15
- NamedEntity.setup(interaction, text.index(interaction), "Interaction")
15
+ NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
16
16
 
17
17
  Segmented.setup(text, [gene1, gene2, interaction])
18
18
 
@@ -23,13 +23,13 @@ class TestPatternRelExt < Test::Unit::TestCase
23
23
  text = "Experiments have shown that TP53 found in cultivated cells interacts with CDK5 under certain conditions"
24
24
 
25
25
  gene1 = "TP53"
26
- NamedEntity.setup(gene1, text.index(gene1), "Gene")
26
+ NamedEntity.setup(gene1, :offset => text.index(gene1), :entity_type => "Gene")
27
27
 
28
28
  gene2 = "CDK5"
29
- NamedEntity.setup(gene2, text.index(gene2), "Gene")
29
+ NamedEntity.setup(gene2, :offset => text.index(gene2), :entity_type => "Gene")
30
30
 
31
31
  interaction = "interacts"
32
- NamedEntity.setup(interaction, text.index(interaction), "Interaction")
32
+ NamedEntity.setup(interaction, :offset => text.index(interaction), :entity_type => "Interaction")
33
33
 
34
34
  Segmented.setup(text, {:entities => [gene1, gene2, interaction]})
35
35
 
@@ -40,7 +40,7 @@ class TestPatternRelExt < Test::Unit::TestCase
40
40
  PatternRelExt.new(["NP[entity:Gene] VP[stem:interacts] with NP[entity:Gene]"]).match_sentences([text]).first.first
41
41
  end
42
42
 
43
- def test_chunk_pattern
43
+ def _test_chunk_pattern
44
44
  text = "There is a concern with the use of thiazolidinediones in patients with an increased risk of colon cancer (e.g., familial colon polyposis)."
45
45
 
46
46
  drug = "thiazolidinediones"
@@ -57,7 +57,7 @@ class TestPatternRelExt < Test::Unit::TestCase
57
57
  end
58
58
 
59
59
 
60
- def test_entities_with_spaces
60
+ def _test_entities_with_spaces
61
61
  PatternRelExt.new("NP[entity:Gene Name]").token_trie
62
62
  end
63
63
 
@@ -23,9 +23,9 @@ class TestRegExpNER < Test::Unit::TestCase
23
23
  matches = RegExpNER.match_regexp_hash(sentence, regexp_hash)
24
24
 
25
25
  assert_equal ["this", "this", "that"].sort, matches.sort
26
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
27
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
28
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
26
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
27
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
28
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
29
29
  end
30
30
 
31
31
  def test_define_regexps
@@ -39,9 +39,9 @@ class TestRegExpNER < Test::Unit::TestCase
39
39
 
40
40
  matches = ner.entities(sentence)
41
41
  assert_equal ["this", "this", "that"].sort, matches.sort
42
- assert_equal "In ".length, matches.select{|m| m.type == :this }[0].offset
43
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this }[1].offset
44
- assert_equal :this, matches.select{|m| m.type == :this }[0].type
42
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this }[0].offset
43
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this }[1].offset
44
+ assert_equal :this, matches.select{|m| m.entity_type == :this }[0].entity_type
45
45
  end
46
46
 
47
47
 
@@ -51,9 +51,9 @@ class TestRegExpNER < Test::Unit::TestCase
51
51
  ner = RegExpNER.new({:this => /this/, :that => /that/})
52
52
  matches = ner.entities(sentence)
53
53
  assert_equal ["this", "this", "that"].sort, matches.sort
54
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
55
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
56
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
54
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
55
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
56
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
57
57
 
58
58
  Segmented.setup(sentence)
59
59
  ner_this = RegExpNER.new({:this => /this/})
@@ -64,9 +64,9 @@ class TestRegExpNER < Test::Unit::TestCase
64
64
  matches = sentence.segments
65
65
 
66
66
  assert_equal ["this", "this", "that"].sort, matches.sort
67
- assert_equal "In ".length, matches.select{|m| m.type == :this}[0].offset
68
- assert_equal "In this sentence I should find ".length, matches.select{|m| m.type == :this}[1].offset
69
- assert_equal :this, matches.select{|m| m.type == :this}[0].type
67
+ assert_equal "In ".length, matches.select{|m| m.entity_type == :this}[0].offset
68
+ assert_equal "In this sentence I should find ".length, matches.select{|m| m.entity_type == :this}[1].offset
69
+ assert_equal :this, matches.select{|m| m.entity_type == :this}[0].entity_type
70
70
  end
71
71
 
72
72
  def test_entities_captures
@@ -75,8 +75,8 @@ class TestRegExpNER < Test::Unit::TestCase
75
75
  ner = RegExpNER.new({:this => /this/, :that => /that/, :should => /I (should)/})
76
76
  matches = ner.entities(sentence)
77
77
  assert_equal ["this", "this", "that", "should"].sort, matches.sort
78
- assert_equal "In this sentence I ".length, matches.select{|m| m.type == :should}[0].offset
79
- assert_equal :should, matches.select{|m| m.type == :should}[0].type
78
+ assert_equal "In this sentence I ".length, matches.select{|m| m.entity_type == :should}[0].offset
79
+ assert_equal :should, matches.select{|m| m.entity_type == :should}[0].entity_type
80
80
  end
81
81
 
82
82
 
@@ -27,10 +27,9 @@ S000000376 AAA GENE1 DDD
27
27
  assert_equal(["S000000029"], @norm.match("FUN21"))
28
28
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
29
29
  assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
30
- assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
31
- assert_equal([], @norm.match("GER4"))
32
-
33
- @norm.match("FUN21")
30
+ assert_equal(["S000000029"].sort, @norm.match("FUN 21").sort)
31
+ assert_equal([], @norm.match("Non-sense"))
32
+ assert_equal(["S000000029", "S000000374"], @norm.match("GER4"))
34
33
  end
35
34
 
36
35
  def test_select
@@ -74,6 +74,7 @@ C2;11;22;3 3;bb
74
74
  index = TokenTrieNER.new("test", TSV.open(file, :flat, :sep => ';'))
75
75
 
76
76
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
+ assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
78
  end
78
79
  end
79
80
 
@@ -1,9 +1,43 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/genia/sentence_splitter'
3
3
 
4
- class TestClass < Test::Unit::TestCase
5
- def test_true
6
- assert true
4
+ class TestNLP < Test::Unit::TestCase
5
+ def test_sentences
6
+ text =<<-EOF
7
+ This is a sentence.
8
+ A funky character ™ in a sentence.
9
+ This is a sentence.
10
+ This is a broken
11
+ sentence. This is
12
+ another broken sentence.
13
+ EOF
14
+
15
+ iii NLP.geniass_sentence_splitter(text)
16
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter(text)[2].strip
17
+ end
18
+
19
+ def test_sentences_2
20
+ text =<<-EOF
21
+ This is a sentence.
22
+ This is a sentence.
23
+ This is a broken
24
+ sentence. This is
25
+ another broken sentence.
26
+ EOF
27
+
28
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter(text)[2].strip
29
+ end
30
+
31
+ def test_sentences_ext
32
+ text =<<-EOF
33
+ This is a sentence.
34
+ This is a sentence.
35
+ This is a broken
36
+ sentence. This is
37
+ another broken sentence.
38
+ EOF
39
+
40
+ assert_equal "This is a broken\nsentence.", NLP.geniass_sentence_splitter_extension(text)[2].strip
7
41
  end
8
42
  end
9
43
 
@@ -1,6 +1,6 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/nlp/open_nlp/sentence_splitter'
3
- require 'rbbt/ner/segment'
3
+ require 'rbbt/segment'
4
4
 
5
5
  $text=<<-EOF
6
6
  Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
@@ -22,6 +22,22 @@ class TestClass < Test::Unit::TestCase
22
22
  def test_sentences
23
23
  text =<<-EOF
24
24
  This is a sentence.
25
+ No funky character in this sentence.
26
+ This is a sentence.
27
+ This is a
28
+ sentence. This is
29
+ another sentence.
30
+ EOF
31
+
32
+ assert_equal 5, OpenNLP.sentence_split_detector.sentDetect(text).length
33
+
34
+ assert_equal 5, OpenNLP.sentence_splitter(text).length
35
+ assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
+ end
37
+
38
+ def test_sentences_fix_utf8
39
+ text =<<-EOF
40
+ This is a sentence.
25
41
  A funky character ™ in a sentence.
26
42
  This is a sentence.
27
43
  This is a
@@ -35,12 +51,12 @@ another sentence.
35
51
  assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
36
52
  end
37
53
 
38
- def _test_text_sentences
54
+ def test_text_sentences
39
55
  Misc.benchmark(100) do
40
- OpenNLP.sentence_splitter($text).include? "Our
56
+ assert OpenNLP.sentence_splitter($text).include?("Our
41
57
  findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
42
58
  AT/RT and the usefulness of antibodies directed against SMARCA4 in this
43
- diagnostic setting."
59
+ diagnostic setting.")
44
60
  end
45
61
  end
46
62
  end
@@ -0,0 +1,39 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/annotation'
6
+
7
+ class TestAnnotation < Test::Unit::TestCase
8
+ def test_annotation
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
13
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
14
+
15
+ assert_equal 'verb', annotation.annotid.split(":")[5]
16
+
17
+ annotation = SegmentAnnotation.setup(segment.segid, :type => :verb)
18
+ assert_equal 'verb', annotation.annotid.split(":")[5]
19
+ end
20
+
21
+ def test_annotid
22
+ text = "This is a document"
23
+ Document.setup(text, "TEST", "test_doc1", nil)
24
+
25
+ corpus = Document::Corpus.setup({})
26
+
27
+ corpus.add_document(text)
28
+
29
+ segment = Segment.setup("is", :offset => text.index("is"), :docid => text.docid)
30
+ annotation = SegmentAnnotation.setup(segment, :type => :verb)
31
+
32
+ annotid = annotation.annotid(corpus)
33
+
34
+ assert_equal 'verb', annotid.type
35
+ assert_equal 'verb', annotid.annotation.type
36
+ assert_equal 'is', annotid.annotation
37
+ end
38
+ end
39
+
@@ -0,0 +1,36 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/document'
3
+ require 'rbbt/document/corpus'
4
+ require 'rbbt/segment'
5
+ require 'rbbt/segment/corpus'
6
+
7
+ class TestSegmentCorpus < Test::Unit::TestCase
8
+ def test_corpus
9
+ text = "This is a document"
10
+ Document.setup(text, "TEST", "test_doc1", nil)
11
+
12
+ corpus = {}
13
+ corpus.extend Document::Corpus
14
+
15
+ corpus.add_document(text)
16
+
17
+ docid = text.docid(corpus)
18
+
19
+ assert_equal docid.document, text
20
+ end
21
+
22
+ def test_find
23
+ text = "This is a document"
24
+ Document.setup(text, "TEST", "test_doc1", nil)
25
+
26
+ TmpFile.with_file do |path|
27
+ corpus = Persist.open_tokyocabinet(path, true, :single, "BDB")
28
+ corpus.extend Document::Corpus
29
+
30
+ corpus.add_document(text)
31
+
32
+ assert corpus.prefix("TEST:").include?(text.docid)
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,24 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment/encoding'
3
+
4
+ class TestEncoding < Test::Unit::TestCase
5
+ def test_bad_chars
6
+ text = "A funky character ™ in a sentence."
7
+
8
+ assert_equal ["™"], Segment.bad_chars(text)
9
+ end
10
+
11
+ def test_ascii
12
+ text = "A funky character ™ in a sentence."
13
+
14
+ Segment.ascii(text) do
15
+ assert_equal "A funky character ? in a sentence.", text
16
+ end
17
+
18
+ Segment.ascii(text, "NONASCII") do
19
+ assert_equal "A funky character NONASCII in a sentence.", text
20
+ end
21
+
22
+ assert_equal "A funky character ™ in a sentence.", text
23
+ end
24
+ end
@@ -1,6 +1,6 @@
1
- require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
- require 'rbbt/text/segment'
3
- require 'rbbt/text/segment/named_entity'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/segment'
3
+ require 'rbbt/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_info
@@ -15,35 +15,39 @@ class TestClass < Test::Unit::TestCase
15
15
 
16
16
  def test_all_args
17
17
  a = "test"
18
- NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
18
+ NamedEntity.setup a, 10, "TEST:doc1:test_type:hash", "NamedEntity", "TYPE", "CODE", "SCORE"
19
19
  assert_equal 10, a.offset
20
+ assert_equal "NamedEntity", a.type
21
+ assert_equal "TYPE", a.entity_type
22
+ assert_equal "SCORE", a.score
20
23
  end
21
24
 
22
25
  def test_tsv
23
26
  a = "test"
24
27
  NamedEntity.setup a, 10, "TYPE", "CODE", "SCORE"
25
- assert Segment.tsv([a]).fields.include? "code"
26
- assert Segment.tsv([a], nil).fields.include? "code"
27
- assert Segment.tsv([a], "literal").fields.include? "code"
28
+ assert Annotated.tsv([a]).fields.include? "code"
29
+ assert Annotated.tsv([a], nil).fields.include? "code"
30
+ assert Annotated.tsv([a], :all).fields.include? "code"
31
+ assert Annotated.tsv([a], :all).fields.include? "literal"
28
32
  end
29
33
 
30
- def test_segment_brat
34
+ def __test_segment_brat
31
35
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
32
36
 
33
37
  gene1 = "TP53"
34
38
  gene1.extend NamedEntity
35
39
  gene1.offset = a.index gene1
36
- gene1.type = "Gene"
40
+ gene1.entity_type = "Gene"
37
41
 
38
42
  gene2 = "CDK5R1"
39
43
  gene2.extend NamedEntity
40
44
  gene2.offset = a.index gene2
41
- gene2.type = "Gene"
45
+ gene2.entity_type = "Gene"
42
46
 
43
47
  gene3 = "TP53 gene"
44
48
  gene3.extend NamedEntity
45
49
  gene3.offset = a.index gene3
46
- gene3.type = "Gene"
50
+ gene3.entity_type = "Gene"
47
51
 
48
52
  segments = [gene1, gene2, gene3]
49
53
  assert segments.collect{|s| s.to_brat}.include? "Gene 27 35"