rbbt-text 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/bin/get_ppis.rb +52 -0
  2. data/lib/rbbt/bow/dictionary.rb +9 -9
  3. data/lib/rbbt/bow/misc.rb +86 -2
  4. data/lib/rbbt/corpus/corpus.rb +55 -0
  5. data/lib/rbbt/corpus/document.rb +289 -0
  6. data/lib/rbbt/corpus/document_repo.rb +115 -0
  7. data/lib/rbbt/corpus/sources/pubmed.rb +26 -0
  8. data/lib/rbbt/ner/NER.rb +7 -5
  9. data/lib/rbbt/ner/abner.rb +13 -2
  10. data/lib/rbbt/ner/annotations.rb +182 -51
  11. data/lib/rbbt/ner/annotations/annotated.rb +15 -0
  12. data/lib/rbbt/ner/annotations/named_entity.rb +37 -0
  13. data/lib/rbbt/ner/annotations/relations.rb +25 -0
  14. data/lib/rbbt/ner/annotations/token.rb +28 -0
  15. data/lib/rbbt/ner/annotations/transformed.rb +170 -0
  16. data/lib/rbbt/ner/banner.rb +8 -5
  17. data/lib/rbbt/ner/chemical_tagger.rb +34 -0
  18. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +136 -0
  19. data/lib/rbbt/ner/oscar3.rb +1 -1
  20. data/lib/rbbt/ner/oscar4.rb +41 -0
  21. data/lib/rbbt/ner/patterns.rb +132 -0
  22. data/lib/rbbt/ner/rnorm.rb +141 -0
  23. data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
  24. data/lib/rbbt/ner/rnorm/tokens.rb +218 -0
  25. data/lib/rbbt/ner/token_trieNER.rb +185 -51
  26. data/lib/rbbt/nlp/genia/sentence_splitter.rb +214 -0
  27. data/lib/rbbt/nlp/nlp.rb +235 -0
  28. data/share/install/software/ABNER +0 -4
  29. data/share/install/software/ChemicalTagger +81 -0
  30. data/share/install/software/Gdep +115 -0
  31. data/share/install/software/Geniass +118 -0
  32. data/share/install/software/OSCAR4 +16 -0
  33. data/share/install/software/StanfordParser +15 -0
  34. data/share/patterns/drug_induce_disease +22 -0
  35. data/share/rnorm/cue_default +10 -0
  36. data/share/rnorm/tokens_default +86 -0
  37. data/share/{stopwords → wordlists/stopwords} +0 -0
  38. data/test/rbbt/bow/test_bow.rb +1 -1
  39. data/test/rbbt/bow/test_dictionary.rb +1 -1
  40. data/test/rbbt/bow/test_misc.rb +1 -1
  41. data/test/rbbt/corpus/test_corpus.rb +99 -0
  42. data/test/rbbt/corpus/test_document.rb +222 -0
  43. data/test/rbbt/ner/annotations/test_named_entity.rb +14 -0
  44. data/test/rbbt/ner/annotations/test_transformed.rb +175 -0
  45. data/test/rbbt/ner/test_abner.rb +1 -1
  46. data/test/rbbt/ner/test_annotations.rb +64 -2
  47. data/test/rbbt/ner/test_banner.rb +1 -1
  48. data/test/rbbt/ner/test_chemical_tagger.rb +56 -0
  49. data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +20 -0
  50. data/test/rbbt/ner/{test_oscar3.rb → test_oscar4.rb} +12 -13
  51. data/test/rbbt/ner/test_patterns.rb +66 -0
  52. data/test/rbbt/ner/test_regexpNER.rb +1 -1
  53. data/test/rbbt/ner/test_rnorm.rb +47 -0
  54. data/test/rbbt/ner/test_token_trieNER.rb +60 -35
  55. data/test/rbbt/nlp/test_nlp.rb +88 -0
  56. data/test/test_helper.rb +20 -0
  57. metadata +93 -20
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
2
  require 'rbbt/ner/regexpNER'
3
3
 
4
4
  class TestRegExpNER < Test::Unit::TestCase
@@ -0,0 +1,47 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
+ require 'rbbt/ner/rnorm'
3
+ require 'rbbt/util/open'
4
+ require 'rbbt/util/tmpfile'
5
+ require 'test/unit'
6
+
7
+ class TestRNORM < Test::Unit::TestCase
8
+
9
+ def setup
10
+ tmp = TmpFile.tmp_file("test-rnorm-")
11
+ lexicon =<<-EOT
12
+ S000000029 YAL031C GIP4 FUN21
13
+ S000000030 YAL032C PRP45 FUN20
14
+ S000000031 YAL033W POP5 FUN53
15
+ S000000374 YBR170C NPL4 HRD4
16
+ S000000375 GENE1 BBB CCC
17
+ S000000376 AAA GENE1 DDD
18
+ EOT
19
+
20
+ Open.write(tmp, lexicon)
21
+
22
+ @norm = Normalizer.new(tmp)
23
+ FileUtils.rm tmp
24
+ end
25
+
26
+ def test_match
27
+ assert_equal(["S000000029"], @norm.match("FUN21"))
28
+ assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
29
+ assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
30
+ assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
31
+ assert_equal([], @norm.match("GER4"))
32
+
33
+ @norm.match("FUN21")
34
+ end
35
+
36
+ def test_select
37
+ assert_equal(["S000000029"], @norm.select(["S000000030", "S000000029", "S000000031"],"FUN 21"))
38
+ end
39
+
40
+ def test_resolve
41
+ assert_equal(["S000000029"], @norm.resolve("FUN 21"))
42
+ end
43
+
44
+ def test_order
45
+ assert_equal(["S000000375"], @norm.resolve("GENE1"))
46
+ end
47
+ end
@@ -10,16 +10,16 @@ class TestTokenTrieNER < Test::Unit::TestCase
10
10
  assert_equal 10, TokenTrieNER.tokenize('123456789 12345').last.offset
11
11
  assert_equal 0, TokenTrieNER.tokenize('123456789 12345').first.offset
12
12
 
13
-
14
13
  text = '123456789 12345'
15
14
  assert_equal '12345', text[TokenTrieNER.tokenize('123456789 12345').last.range]
16
15
  end
17
16
 
18
17
  def test_merge
19
18
  tokens = %w(a b c)
20
- index = {'a' => {'b' => {'c' => {:END => [TokenTrieNER::Code.new 'CODE']}}}}
19
+ tokens.extend TokenTrieNER::EnumeratedArray
20
+ index = {'a' => {'b' => {'c' => {:END => [TokenTrieNER::Code.new('CODE')]}}}}
21
21
 
22
- assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.value
22
+ assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.code
23
23
  end
24
24
 
25
25
  def test_process
@@ -30,7 +30,7 @@ C2;11;22;3 3;bb
30
30
 
31
31
  TmpFile.with_file(lexicon) do |file|
32
32
 
33
- index = TokenTrieNER.process(TSV.new(file, :sep => ';', :flatten => true))
33
+ index = TokenTrieNER.process({}, TSV.new(file, :flat, :sep => ';'))
34
34
 
35
35
  assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
36
36
  assert_equal [:END], index['aa'].keys
@@ -47,20 +47,20 @@ C2;11;22;3 3;bb
47
47
 
48
48
 
49
49
  TmpFile.with_file(lexicon) do |file|
50
- index = TokenTrieNER.process(TSV.new(file, :sep => ';', :flatten => true))
50
+ index = TokenTrieNER.process({}, TSV.new(file, :sep => ';', :type => :flat ))
51
51
 
52
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).first.collect{|c| c.value}.include? 'C1'
53
- assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).last
52
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C1'
53
+ assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).last
54
54
 
55
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), true).first.collect{|c| c.value}.include? 'C1'
55
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), true).first.collect{|c| c.code}.include? 'C1'
56
56
 
57
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).first.collect{|c| c.value}.include? 'C1'
58
- assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).last
57
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), true).first.collect{|c| c.code}.include? 'C1'
58
+ assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), true).last
59
59
 
60
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).first.collect{|c| c.value}.include? 'C2'
61
- assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).last
60
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C2'
61
+ assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), false).last
62
62
 
63
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf'), false).first.collect{|c| c.value}.include? 'C2'
63
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C2'
64
64
  end
65
65
  end
66
66
 
@@ -71,42 +71,67 @@ C2;11;22;3 3;bb
71
71
  EOF
72
72
 
73
73
  TmpFile.with_file(lexicon) do |file|
74
- index = TokenTrieNER.new(file, nil, :sep => ';')
74
+ index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'))
75
75
 
76
+ index.match(' asdfa dsf asdf aa asdfasdf ')
76
77
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
78
  end
78
79
  end
79
80
 
80
- def _test_polysearch_long_match
81
- begin
82
- require 'rbbt/sources/polysearch'
83
- rescue
84
- puts "Polysearch is not available. Some test have not ran."
85
- assert true
86
- return
81
+ def test_slack
82
+ lexicon =<<-EOF
83
+ C1;aa;AA;bb cc cc b
84
+ C2;11;22;3 3;bb
85
+ EOF
86
+
87
+ TmpFile.with_file(lexicon) do |file|
88
+ index = TokenTrieNER.new({})
89
+ index.slack = Proc.new{|t| t =~ /^c*$/}
90
+
91
+ index.merge TSV.new(file, :flat, :sep => ';')
92
+
93
+ assert index.match(' aaaaa 3 cc 3').select{|m| m.code.include? 'C2'}.any?
94
+ assert index.match(' bb cc b').select{|m| m.code.include? 'C1'}.any?
95
+ assert index.match(' bb b').select{|m| m.code.include? 'C1'}.any?
87
96
  end
97
+ end
88
98
 
89
- sentence = "mammary and pituitary neoplasms as well as other drug-related mammary/reproductive tissue alterations in females were considered"
99
+ def test_own_tokens
100
+ lexicon =<<-EOF
101
+ C1;aa;AA;bb cc cc b
102
+ C2;11;22;3 3;bb
103
+ EOF
90
104
 
91
- index = TokenTrieNER.new Rbbt.find_datafile('organ')
92
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
105
+ TmpFile.with_file(lexicon) do |file|
106
+ index = TokenTrieNER.new({})
107
+ index.slack = Proc.new{|t| t =~ /^c*$/}
93
108
 
94
- index = TokenTrieNER.new Rbbt.find_datafile('disease')
95
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
109
+ index.merge TSV.new(file, :flat, :sep => ';')
96
110
 
97
- index = TokenTrieNER.new Rbbt.find_datafile('disease'), Rbbt.find_datafile('organ')
98
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
111
+ assert index.match(Token.tokenize('3 cc 3')).select{|m| m.code.include? 'C2'}.any?
112
+ end
113
+ end
99
114
 
100
- index = TokenTrieNER.new Rbbt.find_datafile('disease'), Rbbt.find_datafile('organ')
101
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
115
+ def test_proc_index
116
+ index = TokenTrieNER.new({})
117
+ index.merge({ "aa" => {:PROCS => {Proc.new{|c| c == 'c'} => {:END => [TokenTrieNER::Code.new(:entity, :C1)]}}}})
102
118
 
103
- index = TokenTrieNER.new Rbbt.find_datafile('organ')
104
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
105
- index.merge Rbbt.find_datafile('disease')
106
- assert ! index.match(sentence).collect{|m| m.code}.flatten.include?('OR00063')
107
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
119
+ assert index.match(Token.tokenize('3 cc 3 aa c ddd')).select{|m| m.code.include? :entity}.any?
108
120
  end
109
121
 
122
+ def test_persistence
123
+ lexicon =<<-EOF
124
+ C1;aa;AA;bb b
125
+ C2;11;22;3 3;bb
126
+ EOF
127
+
128
+ TmpFile.with_file(lexicon) do |file|
129
+ index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'), :persistence => true)
130
+
131
+ index.match(' asdfa dsf asdf aa asdfasdf ')
132
+ assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
133
+ end
134
+ end
110
135
 
111
136
  end
112
137
 
@@ -0,0 +1,88 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/nlp/nlp'
3
+
4
+ text=<<-EOF
5
+ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
6
+ of early childhood poorly responding to therapy. The majority of cases show
7
+ inactivation of SMARCB1 (INI1, hSNF5, BAF47), a core member of the adenosine
8
+ triphosphate (ATP)-dependent SWI/SNF chromatin-remodeling complex. We here
9
+ report the case of a supratentorial AT/RT in a 9-month-old boy, which showed
10
+ retained SMARCB1 staining on immunohistochemistry and lacked genetic
11
+ alterations of SMARCB1. Instead, the tumor showed loss of protein expression of
12
+ another SWI/SNF chromatin-remodeling complex member, the ATPase subunit SMARCA4
13
+ (BRG1) due to a homozygous SMARCA4 mutation [c.2032C>T (p.Q678X)]. Our
14
+ findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
15
+ AT/RT and the usefulness of antibodies directed against SMARCA4 in this
16
+ diagnostic setting.
17
+ EOF
18
+
19
+ class TestClass < Test::Unit::TestCase
20
+
21
+ def test_sentences
22
+ text =<<-EOF
23
+ This is a
24
+ sentence. This is
25
+ another sentence.
26
+ EOF
27
+
28
+ assert_equal 2, NLP.geniass_sentence_splitter(text).length
29
+ assert_equal "This is a \nsentence. ", NLP.geniass_sentence_splitter(text).first
30
+ end
31
+
32
+ def test_gdep_parse_sentences
33
+ text =<<-EOF
34
+ Atypical teratoid/rhabdoid tumors (AT/RTs)
35
+ are highly aggressive brain
36
+ tumors of early childhood poorly
37
+ responding to therapy.
38
+ EOF
39
+
40
+ NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
41
+ segment_list.each do |segment|
42
+ assert_equal sentence[segment.range], segment
43
+ end
44
+ end
45
+ end
46
+
47
+ def test_gdep_chunks
48
+ text =<<-EOF
49
+ Atypical teratoid/rhabdoid tumors (AT/RTs)
50
+ are highly aggressive brain
51
+ tumors of early childhood poorly
52
+ responding to therapy.
53
+ EOF
54
+
55
+ NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
56
+ chunk_list = NLP.gdep_chunks(sentence, segment_list)
57
+ chunk_list.each do |segment|
58
+ assert_equal sentence[segment.range], segment
59
+ end
60
+
61
+ assert chunk_list.select{|c| c =~ /rhabdoid/}.first.parts.include? "tumors"
62
+ end
63
+
64
+ end
65
+
66
+ def test_merge_chunks
67
+ text =<<-EOF
68
+ Atypical teratoid/rhabdoid tumors (AT/RTs)
69
+ where found to be like highly aggressive brain
70
+ tumors of early childhood poorly
71
+ responding to therapy.
72
+ EOF
73
+
74
+ NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
75
+ chunk_list = NLP.gdep_chunks(sentence, segment_list)
76
+ new_chunk_list = NLP.merge_vp_chunks(chunk_list)
77
+
78
+ new_chunk_list.each do |segment|
79
+ assert_equal sentence[segment.range], segment
80
+ end
81
+
82
+ assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "found"
83
+ assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "to"
84
+ assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "be"
85
+ end
86
+ end
87
+ end
88
+
@@ -2,8 +2,28 @@ require 'test/unit'
2
2
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
3
  $LOAD_PATH.unshift(File.dirname(__FILE__))
4
4
 
5
+ require 'rbbt'
6
+ require 'rbbt/util/persistence'
7
+ require 'rbbt/util/tmpfile'
8
+ require 'rbbt/util/log'
9
+ require 'rbbt/corpus/document_repo'
10
+
5
11
  class Test::Unit::TestCase
6
12
  def test_datafile(file)
7
13
  File.join(File.dirname(__FILE__), 'data', file)
8
14
  end
15
+
16
+ def setup
17
+ FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
18
+ Persistence.cachedir = Rbbt.tmp.test.persistence.find :user
19
+ end
20
+
21
+ def teardown
22
+ FileUtils.rm_rf Rbbt.tmp.test.find :user
23
+ TCHash::CONNECTIONS.values.each do |c| c.close end
24
+ TCHash::CONNECTIONS.clear
25
+ DocumentRepo::CONNECTIONS.values.each do |c| c.close end
26
+ DocumentRepo::CONNECTIONS.clear
27
+ end
28
+
9
29
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 2
9
- - 1
10
- version: 0.2.1
8
+ - 5
9
+ - 0
10
+ version: 0.5.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,8 +15,8 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-30 00:00:00 +01:00
19
- default_executable:
18
+ date: 2011-07-05 00:00:00 +02:00
19
+ default_executable: get_ppis.rb
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: rbbt-util
@@ -46,10 +46,38 @@ dependencies:
46
46
  version: "0"
47
47
  type: :runtime
48
48
  version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: libxml-ruby
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :runtime
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: json
65
+ prerelease: false
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ type: :runtime
76
+ version_requirements: *id004
49
77
  description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
50
78
  email: miguel.vazquez@fdi.ucm.es
51
- executables: []
52
-
79
+ executables:
80
+ - get_ppis.rb
53
81
  extensions: []
54
82
 
55
83
  extra_rdoc_files: []
@@ -58,28 +86,64 @@ files:
58
86
  - lib/rbbt/bow/bow.rb
59
87
  - lib/rbbt/bow/dictionary.rb
60
88
  - lib/rbbt/bow/misc.rb
89
+ - lib/rbbt/corpus/corpus.rb
90
+ - lib/rbbt/corpus/document.rb
91
+ - lib/rbbt/corpus/document_repo.rb
92
+ - lib/rbbt/corpus/sources/pubmed.rb
61
93
  - lib/rbbt/ner/NER.rb
62
94
  - lib/rbbt/ner/abner.rb
63
95
  - lib/rbbt/ner/annotations.rb
96
+ - lib/rbbt/ner/annotations/annotated.rb
97
+ - lib/rbbt/ner/annotations/named_entity.rb
98
+ - lib/rbbt/ner/annotations/relations.rb
99
+ - lib/rbbt/ner/annotations/token.rb
100
+ - lib/rbbt/ner/annotations/transformed.rb
64
101
  - lib/rbbt/ner/banner.rb
102
+ - lib/rbbt/ner/chemical_tagger.rb
103
+ - lib/rbbt/ner/ngram_prefix_dictionary.rb
65
104
  - lib/rbbt/ner/oscar3.rb
105
+ - lib/rbbt/ner/oscar4.rb
106
+ - lib/rbbt/ner/patterns.rb
66
107
  - lib/rbbt/ner/regexpNER.rb
108
+ - lib/rbbt/ner/rnorm.rb
109
+ - lib/rbbt/ner/rnorm/cue_index.rb
110
+ - lib/rbbt/ner/rnorm/tokens.rb
67
111
  - lib/rbbt/ner/token_trieNER.rb
112
+ - lib/rbbt/nlp/genia/sentence_splitter.rb
113
+ - lib/rbbt/nlp/nlp.rb
68
114
  - share/install/software/ABNER
69
115
  - share/install/software/BANNER
116
+ - share/install/software/ChemicalTagger
117
+ - share/install/software/Gdep
118
+ - share/install/software/Geniass
70
119
  - share/install/software/OSCAR3
71
- - share/stopwords
120
+ - share/install/software/OSCAR4
121
+ - share/install/software/StanfordParser
122
+ - share/patterns/drug_induce_disease
123
+ - share/rnorm/cue_default
124
+ - share/rnorm/tokens_default
125
+ - share/wordlists/stopwords
126
+ - test/test_helper.rb
72
127
  - test/rbbt/bow/test_bow.rb
73
128
  - test/rbbt/bow/test_dictionary.rb
74
129
  - test/rbbt/bow/test_misc.rb
75
- - test/rbbt/ner/test_NER.rb
130
+ - test/rbbt/ner/test_regexpNER.rb
76
131
  - test/rbbt/ner/test_abner.rb
77
- - test/rbbt/ner/test_annotations.rb
78
132
  - test/rbbt/ner/test_banner.rb
79
- - test/rbbt/ner/test_oscar3.rb
80
- - test/rbbt/ner/test_regexpNER.rb
133
+ - test/rbbt/ner/annotations/test_transformed.rb
134
+ - test/rbbt/ner/annotations/test_named_entity.rb
81
135
  - test/rbbt/ner/test_token_trieNER.rb
82
- - test/test_helper.rb
136
+ - test/rbbt/ner/test_annotations.rb
137
+ - test/rbbt/ner/test_patterns.rb
138
+ - test/rbbt/ner/test_NER.rb
139
+ - test/rbbt/ner/test_rnorm.rb
140
+ - test/rbbt/ner/test_oscar4.rb
141
+ - test/rbbt/ner/test_chemical_tagger.rb
142
+ - test/rbbt/ner/test_ngram_prefix_dictionary.rb
143
+ - test/rbbt/nlp/test_nlp.rb
144
+ - test/rbbt/corpus/test_corpus.rb
145
+ - test/rbbt/corpus/test_document.rb
146
+ - bin/get_ppis.rb
83
147
  has_rdoc: true
84
148
  homepage: http://github.com/mikisvaz/rbbt-util
85
149
  licenses: []
@@ -110,19 +174,28 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
174
  requirements: []
111
175
 
112
176
  rubyforge_project:
113
- rubygems_version: 1.4.2
177
+ rubygems_version: 1.6.2
114
178
  signing_key:
115
179
  specification_version: 3
116
180
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
117
181
  test_files:
182
+ - test/test_helper.rb
118
183
  - test/rbbt/bow/test_bow.rb
119
184
  - test/rbbt/bow/test_dictionary.rb
120
185
  - test/rbbt/bow/test_misc.rb
121
- - test/rbbt/ner/test_NER.rb
186
+ - test/rbbt/ner/test_regexpNER.rb
122
187
  - test/rbbt/ner/test_abner.rb
123
- - test/rbbt/ner/test_annotations.rb
124
188
  - test/rbbt/ner/test_banner.rb
125
- - test/rbbt/ner/test_oscar3.rb
126
- - test/rbbt/ner/test_regexpNER.rb
189
+ - test/rbbt/ner/annotations/test_transformed.rb
190
+ - test/rbbt/ner/annotations/test_named_entity.rb
127
191
  - test/rbbt/ner/test_token_trieNER.rb
128
- - test/test_helper.rb
192
+ - test/rbbt/ner/test_annotations.rb
193
+ - test/rbbt/ner/test_patterns.rb
194
+ - test/rbbt/ner/test_NER.rb
195
+ - test/rbbt/ner/test_rnorm.rb
196
+ - test/rbbt/ner/test_oscar4.rb
197
+ - test/rbbt/ner/test_chemical_tagger.rb
198
+ - test/rbbt/ner/test_ngram_prefix_dictionary.rb
199
+ - test/rbbt/nlp/test_nlp.rb
200
+ - test/rbbt/corpus/test_corpus.rb
201
+ - test/rbbt/corpus/test_document.rb