rbbt-text 0.2.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/bin/get_ppis.rb +52 -0
  2. data/lib/rbbt/bow/dictionary.rb +9 -9
  3. data/lib/rbbt/bow/misc.rb +86 -2
  4. data/lib/rbbt/corpus/corpus.rb +55 -0
  5. data/lib/rbbt/corpus/document.rb +289 -0
  6. data/lib/rbbt/corpus/document_repo.rb +115 -0
  7. data/lib/rbbt/corpus/sources/pubmed.rb +26 -0
  8. data/lib/rbbt/ner/NER.rb +7 -5
  9. data/lib/rbbt/ner/abner.rb +13 -2
  10. data/lib/rbbt/ner/annotations.rb +182 -51
  11. data/lib/rbbt/ner/annotations/annotated.rb +15 -0
  12. data/lib/rbbt/ner/annotations/named_entity.rb +37 -0
  13. data/lib/rbbt/ner/annotations/relations.rb +25 -0
  14. data/lib/rbbt/ner/annotations/token.rb +28 -0
  15. data/lib/rbbt/ner/annotations/transformed.rb +170 -0
  16. data/lib/rbbt/ner/banner.rb +8 -5
  17. data/lib/rbbt/ner/chemical_tagger.rb +34 -0
  18. data/lib/rbbt/ner/ngram_prefix_dictionary.rb +136 -0
  19. data/lib/rbbt/ner/oscar3.rb +1 -1
  20. data/lib/rbbt/ner/oscar4.rb +41 -0
  21. data/lib/rbbt/ner/patterns.rb +132 -0
  22. data/lib/rbbt/ner/rnorm.rb +141 -0
  23. data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
  24. data/lib/rbbt/ner/rnorm/tokens.rb +218 -0
  25. data/lib/rbbt/ner/token_trieNER.rb +185 -51
  26. data/lib/rbbt/nlp/genia/sentence_splitter.rb +214 -0
  27. data/lib/rbbt/nlp/nlp.rb +235 -0
  28. data/share/install/software/ABNER +0 -4
  29. data/share/install/software/ChemicalTagger +81 -0
  30. data/share/install/software/Gdep +115 -0
  31. data/share/install/software/Geniass +118 -0
  32. data/share/install/software/OSCAR4 +16 -0
  33. data/share/install/software/StanfordParser +15 -0
  34. data/share/patterns/drug_induce_disease +22 -0
  35. data/share/rnorm/cue_default +10 -0
  36. data/share/rnorm/tokens_default +86 -0
  37. data/share/{stopwords → wordlists/stopwords} +0 -0
  38. data/test/rbbt/bow/test_bow.rb +1 -1
  39. data/test/rbbt/bow/test_dictionary.rb +1 -1
  40. data/test/rbbt/bow/test_misc.rb +1 -1
  41. data/test/rbbt/corpus/test_corpus.rb +99 -0
  42. data/test/rbbt/corpus/test_document.rb +222 -0
  43. data/test/rbbt/ner/annotations/test_named_entity.rb +14 -0
  44. data/test/rbbt/ner/annotations/test_transformed.rb +175 -0
  45. data/test/rbbt/ner/test_abner.rb +1 -1
  46. data/test/rbbt/ner/test_annotations.rb +64 -2
  47. data/test/rbbt/ner/test_banner.rb +1 -1
  48. data/test/rbbt/ner/test_chemical_tagger.rb +56 -0
  49. data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +20 -0
  50. data/test/rbbt/ner/{test_oscar3.rb → test_oscar4.rb} +12 -13
  51. data/test/rbbt/ner/test_patterns.rb +66 -0
  52. data/test/rbbt/ner/test_regexpNER.rb +1 -1
  53. data/test/rbbt/ner/test_rnorm.rb +47 -0
  54. data/test/rbbt/ner/test_token_trieNER.rb +60 -35
  55. data/test/rbbt/nlp/test_nlp.rb +88 -0
  56. data/test/test_helper.rb +20 -0
  57. metadata +93 -20
@@ -1,4 +1,4 @@
1
- require File.dirname(__FILE__) + '/../../test_helper'
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
2
  require 'rbbt/ner/regexpNER'
3
3
 
4
4
  class TestRegExpNER < Test::Unit::TestCase
@@ -0,0 +1,47 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
+ require 'rbbt/ner/rnorm'
3
+ require 'rbbt/util/open'
4
+ require 'rbbt/util/tmpfile'
5
+ require 'test/unit'
6
+
7
+ class TestRNORM < Test::Unit::TestCase
8
+
9
+ def setup
10
+ tmp = TmpFile.tmp_file("test-rnorm-")
11
+ lexicon =<<-EOT
12
+ S000000029 YAL031C GIP4 FUN21
13
+ S000000030 YAL032C PRP45 FUN20
14
+ S000000031 YAL033W POP5 FUN53
15
+ S000000374 YBR170C NPL4 HRD4
16
+ S000000375 GENE1 BBB CCC
17
+ S000000376 AAA GENE1 DDD
18
+ EOT
19
+
20
+ Open.write(tmp, lexicon)
21
+
22
+ @norm = Normalizer.new(tmp)
23
+ FileUtils.rm tmp
24
+ end
25
+
26
+ def test_match
27
+ assert_equal(["S000000029"], @norm.match("FUN21"))
28
+ assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
29
+ assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
30
+ assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
31
+ assert_equal([], @norm.match("GER4"))
32
+
33
+ @norm.match("FUN21")
34
+ end
35
+
36
+ def test_select
37
+ assert_equal(["S000000029"], @norm.select(["S000000030", "S000000029", "S000000031"],"FUN 21"))
38
+ end
39
+
40
+ def test_resolve
41
+ assert_equal(["S000000029"], @norm.resolve("FUN 21"))
42
+ end
43
+
44
+ def test_order
45
+ assert_equal(["S000000375"], @norm.resolve("GENE1"))
46
+ end
47
+ end
@@ -10,16 +10,16 @@ class TestTokenTrieNER < Test::Unit::TestCase
10
10
  assert_equal 10, TokenTrieNER.tokenize('123456789 12345').last.offset
11
11
  assert_equal 0, TokenTrieNER.tokenize('123456789 12345').first.offset
12
12
 
13
-
14
13
  text = '123456789 12345'
15
14
  assert_equal '12345', text[TokenTrieNER.tokenize('123456789 12345').last.range]
16
15
  end
17
16
 
18
17
  def test_merge
19
18
  tokens = %w(a b c)
20
- index = {'a' => {'b' => {'c' => {:END => [TokenTrieNER::Code.new 'CODE']}}}}
19
+ tokens.extend TokenTrieNER::EnumeratedArray
20
+ index = {'a' => {'b' => {'c' => {:END => [TokenTrieNER::Code.new('CODE')]}}}}
21
21
 
22
- assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.value
22
+ assert_equal 'CODE', TokenTrieNER.merge({}, TokenTrieNER.index_for_tokens(tokens, 'CODE'))['a']['b']['c'][:END].first.code
23
23
  end
24
24
 
25
25
  def test_process
@@ -30,7 +30,7 @@ C2;11;22;3 3;bb
30
30
 
31
31
  TmpFile.with_file(lexicon) do |file|
32
32
 
33
- index = TokenTrieNER.process(TSV.new(file, :sep => ';', :flatten => true))
33
+ index = TokenTrieNER.process({}, TSV.new(file, :flat, :sep => ';'))
34
34
 
35
35
  assert_equal ['AA', 'aa', 'bb', '11', '22', '3'].sort, index.keys.sort
36
36
  assert_equal [:END], index['aa'].keys
@@ -47,20 +47,20 @@ C2;11;22;3 3;bb
47
47
 
48
48
 
49
49
  TmpFile.with_file(lexicon) do |file|
50
- index = TokenTrieNER.process(TSV.new(file, :sep => ';', :flatten => true))
50
+ index = TokenTrieNER.process({}, TSV.new(file, :sep => ';', :type => :flat ))
51
51
 
52
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).first.collect{|c| c.value}.include? 'C1'
53
- assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), false).last
52
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C1'
53
+ assert_equal %w(aa), TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), false).last
54
54
 
55
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf'), true).first.collect{|c| c.value}.include? 'C1'
55
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('aa asdf').extend(TokenTrieNER::EnumeratedArray), true).first.collect{|c| c.code}.include? 'C1'
56
56
 
57
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).first.collect{|c| c.value}.include? 'C1'
58
- assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), true).last
57
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), true).first.collect{|c| c.code}.include? 'C1'
58
+ assert_equal %w(bb b), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), true).last
59
59
 
60
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).first.collect{|c| c.value}.include? 'C2'
61
- assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf'), false).last
60
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C2'
61
+ assert_equal %w(bb), TokenTrieNER.find(index, TokenTrieNER.tokenize('bb b asdf').extend(TokenTrieNER::EnumeratedArray), false).last
62
62
 
63
- assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf'), false).first.collect{|c| c.value}.include? 'C2'
63
+ assert TokenTrieNER.find(index, TokenTrieNER.tokenize('bb asdf').extend(TokenTrieNER::EnumeratedArray), false).first.collect{|c| c.code}.include? 'C2'
64
64
  end
65
65
  end
66
66
 
@@ -71,42 +71,67 @@ C2;11;22;3 3;bb
71
71
  EOF
72
72
 
73
73
  TmpFile.with_file(lexicon) do |file|
74
- index = TokenTrieNER.new(file, nil, :sep => ';')
74
+ index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'))
75
75
 
76
+ index.match(' asdfa dsf asdf aa asdfasdf ')
76
77
  assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
77
78
  end
78
79
  end
79
80
 
80
- def _test_polysearch_long_match
81
- begin
82
- require 'rbbt/sources/polysearch'
83
- rescue
84
- puts "Polysearch is not available. Some test have not ran."
85
- assert true
86
- return
81
+ def test_slack
82
+ lexicon =<<-EOF
83
+ C1;aa;AA;bb cc cc b
84
+ C2;11;22;3 3;bb
85
+ EOF
86
+
87
+ TmpFile.with_file(lexicon) do |file|
88
+ index = TokenTrieNER.new({})
89
+ index.slack = Proc.new{|t| t =~ /^c*$/}
90
+
91
+ index.merge TSV.new(file, :flat, :sep => ';')
92
+
93
+ assert index.match(' aaaaa 3 cc 3').select{|m| m.code.include? 'C2'}.any?
94
+ assert index.match(' bb cc b').select{|m| m.code.include? 'C1'}.any?
95
+ assert index.match(' bb b').select{|m| m.code.include? 'C1'}.any?
87
96
  end
97
+ end
88
98
 
89
- sentence = "mammary and pituitary neoplasms as well as other drug-related mammary/reproductive tissue alterations in females were considered"
99
+ def test_own_tokens
100
+ lexicon =<<-EOF
101
+ C1;aa;AA;bb cc cc b
102
+ C2;11;22;3 3;bb
103
+ EOF
90
104
 
91
- index = TokenTrieNER.new Rbbt.find_datafile('organ')
92
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
105
+ TmpFile.with_file(lexicon) do |file|
106
+ index = TokenTrieNER.new({})
107
+ index.slack = Proc.new{|t| t =~ /^c*$/}
93
108
 
94
- index = TokenTrieNER.new Rbbt.find_datafile('disease')
95
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
109
+ index.merge TSV.new(file, :flat, :sep => ';')
96
110
 
97
- index = TokenTrieNER.new Rbbt.find_datafile('disease'), Rbbt.find_datafile('organ')
98
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
111
+ assert index.match(Token.tokenize('3 cc 3')).select{|m| m.code.include? 'C2'}.any?
112
+ end
113
+ end
99
114
 
100
- index = TokenTrieNER.new Rbbt.find_datafile('disease'), Rbbt.find_datafile('organ')
101
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
115
+ def test_proc_index
116
+ index = TokenTrieNER.new({})
117
+ index.merge({ "aa" => {:PROCS => {Proc.new{|c| c == 'c'} => {:END => [TokenTrieNER::Code.new(:entity, :C1)]}}}})
102
118
 
103
- index = TokenTrieNER.new Rbbt.find_datafile('organ')
104
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'OR00063'
105
- index.merge Rbbt.find_datafile('disease')
106
- assert ! index.match(sentence).collect{|m| m.code}.flatten.include?('OR00063')
107
- assert index.match(sentence).collect{|m| m.code}.flatten.include? 'DID44386'
119
+ assert index.match(Token.tokenize('3 cc 3 aa c ddd')).select{|m| m.code.include? :entity}.any?
108
120
  end
109
121
 
122
+ def test_persistence
123
+ lexicon =<<-EOF
124
+ C1;aa;AA;bb b
125
+ C2;11;22;3 3;bb
126
+ EOF
127
+
128
+ TmpFile.with_file(lexicon) do |file|
129
+ index = TokenTrieNER.new("test", TSV.new(file, :flat, :sep => ';'), :persistence => true)
130
+
131
+ index.match(' asdfa dsf asdf aa asdfasdf ')
132
+ assert index.match(' asdfa dsf asdf aa asdfasdf ').select{|m| m.code.include? 'C1'}.any?
133
+ end
134
+ end
110
135
 
111
136
  end
112
137
 
@@ -0,0 +1,88 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/nlp/nlp'
3
+
4
+ text=<<-EOF
5
+ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
6
+ of early childhood poorly responding to therapy. The majority of cases show
7
+ inactivation of SMARCB1 (INI1, hSNF5, BAF47), a core member of the adenosine
8
+ triphosphate (ATP)-dependent SWI/SNF chromatin-remodeling complex. We here
9
+ report the case of a supratentorial AT/RT in a 9-month-old boy, which showed
10
+ retained SMARCB1 staining on immunohistochemistry and lacked genetic
11
+ alterations of SMARCB1. Instead, the tumor showed loss of protein expression of
12
+ another SWI/SNF chromatin-remodeling complex member, the ATPase subunit SMARCA4
13
+ (BRG1) due to a homozygous SMARCA4 mutation [c.2032C>T (p.Q678X)]. Our
14
+ findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
15
+ AT/RT and the usefulness of antibodies directed against SMARCA4 in this
16
+ diagnostic setting.
17
+ EOF
18
+
19
+ class TestClass < Test::Unit::TestCase
20
+
21
+ def test_sentences
22
+ text =<<-EOF
23
+ This is a
24
+ sentence. This is
25
+ another sentence.
26
+ EOF
27
+
28
+ assert_equal 2, NLP.geniass_sentence_splitter(text).length
29
+ assert_equal "This is a \nsentence. ", NLP.geniass_sentence_splitter(text).first
30
+ end
31
+
32
+ def test_gdep_parse_sentences
33
+ text =<<-EOF
34
+ Atypical teratoid/rhabdoid tumors (AT/RTs)
35
+ are highly aggressive brain
36
+ tumors of early childhood poorly
37
+ responding to therapy.
38
+ EOF
39
+
40
+ NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
41
+ segment_list.each do |segment|
42
+ assert_equal sentence[segment.range], segment
43
+ end
44
+ end
45
+ end
46
+
47
+ def test_gdep_chunks
48
+ text =<<-EOF
49
+ Atypical teratoid/rhabdoid tumors (AT/RTs)
50
+ are highly aggressive brain
51
+ tumors of early childhood poorly
52
+ responding to therapy.
53
+ EOF
54
+
55
+ NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
56
+ chunk_list = NLP.gdep_chunks(sentence, segment_list)
57
+ chunk_list.each do |segment|
58
+ assert_equal sentence[segment.range], segment
59
+ end
60
+
61
+ assert chunk_list.select{|c| c =~ /rhabdoid/}.first.parts.include? "tumors"
62
+ end
63
+
64
+ end
65
+
66
+ def test_merge_chunks
67
+ text =<<-EOF
68
+ Atypical teratoid/rhabdoid tumors (AT/RTs)
69
+ where found to be like highly aggressive brain
70
+ tumors of early childhood poorly
71
+ responding to therapy.
72
+ EOF
73
+
74
+ NLP.gdep_parse_sentences_extension([text, text]).zip([text,text]).each do |segment_list, sentence|
75
+ chunk_list = NLP.gdep_chunks(sentence, segment_list)
76
+ new_chunk_list = NLP.merge_vp_chunks(chunk_list)
77
+
78
+ new_chunk_list.each do |segment|
79
+ assert_equal sentence[segment.range], segment
80
+ end
81
+
82
+ assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "found"
83
+ assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "to"
84
+ assert new_chunk_list.select{|c| c.type == "VP"}.first.parts.include? "be"
85
+ end
86
+ end
87
+ end
88
+
@@ -2,8 +2,28 @@ require 'test/unit'
2
2
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
3
  $LOAD_PATH.unshift(File.dirname(__FILE__))
4
4
 
5
+ require 'rbbt'
6
+ require 'rbbt/util/persistence'
7
+ require 'rbbt/util/tmpfile'
8
+ require 'rbbt/util/log'
9
+ require 'rbbt/corpus/document_repo'
10
+
5
11
  class Test::Unit::TestCase
6
12
  def test_datafile(file)
7
13
  File.join(File.dirname(__FILE__), 'data', file)
8
14
  end
15
+
16
+ def setup
17
+ FileUtils.mkdir_p Rbbt.tmp.test.persistence.find(:user)
18
+ Persistence.cachedir = Rbbt.tmp.test.persistence.find :user
19
+ end
20
+
21
+ def teardown
22
+ FileUtils.rm_rf Rbbt.tmp.test.find :user
23
+ TCHash::CONNECTIONS.values.each do |c| c.close end
24
+ TCHash::CONNECTIONS.clear
25
+ DocumentRepo::CONNECTIONS.values.each do |c| c.close end
26
+ DocumentRepo::CONNECTIONS.clear
27
+ end
28
+
9
29
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 2
9
- - 1
10
- version: 0.2.1
8
+ - 5
9
+ - 0
10
+ version: 0.5.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,8 +15,8 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-30 00:00:00 +01:00
19
- default_executable:
18
+ date: 2011-07-05 00:00:00 +02:00
19
+ default_executable: get_ppis.rb
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: rbbt-util
@@ -46,10 +46,38 @@ dependencies:
46
46
  version: "0"
47
47
  type: :runtime
48
48
  version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: libxml-ruby
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :runtime
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: json
65
+ prerelease: false
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ type: :runtime
76
+ version_requirements: *id004
49
77
  description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
50
78
  email: miguel.vazquez@fdi.ucm.es
51
- executables: []
52
-
79
+ executables:
80
+ - get_ppis.rb
53
81
  extensions: []
54
82
 
55
83
  extra_rdoc_files: []
@@ -58,28 +86,64 @@ files:
58
86
  - lib/rbbt/bow/bow.rb
59
87
  - lib/rbbt/bow/dictionary.rb
60
88
  - lib/rbbt/bow/misc.rb
89
+ - lib/rbbt/corpus/corpus.rb
90
+ - lib/rbbt/corpus/document.rb
91
+ - lib/rbbt/corpus/document_repo.rb
92
+ - lib/rbbt/corpus/sources/pubmed.rb
61
93
  - lib/rbbt/ner/NER.rb
62
94
  - lib/rbbt/ner/abner.rb
63
95
  - lib/rbbt/ner/annotations.rb
96
+ - lib/rbbt/ner/annotations/annotated.rb
97
+ - lib/rbbt/ner/annotations/named_entity.rb
98
+ - lib/rbbt/ner/annotations/relations.rb
99
+ - lib/rbbt/ner/annotations/token.rb
100
+ - lib/rbbt/ner/annotations/transformed.rb
64
101
  - lib/rbbt/ner/banner.rb
102
+ - lib/rbbt/ner/chemical_tagger.rb
103
+ - lib/rbbt/ner/ngram_prefix_dictionary.rb
65
104
  - lib/rbbt/ner/oscar3.rb
105
+ - lib/rbbt/ner/oscar4.rb
106
+ - lib/rbbt/ner/patterns.rb
66
107
  - lib/rbbt/ner/regexpNER.rb
108
+ - lib/rbbt/ner/rnorm.rb
109
+ - lib/rbbt/ner/rnorm/cue_index.rb
110
+ - lib/rbbt/ner/rnorm/tokens.rb
67
111
  - lib/rbbt/ner/token_trieNER.rb
112
+ - lib/rbbt/nlp/genia/sentence_splitter.rb
113
+ - lib/rbbt/nlp/nlp.rb
68
114
  - share/install/software/ABNER
69
115
  - share/install/software/BANNER
116
+ - share/install/software/ChemicalTagger
117
+ - share/install/software/Gdep
118
+ - share/install/software/Geniass
70
119
  - share/install/software/OSCAR3
71
- - share/stopwords
120
+ - share/install/software/OSCAR4
121
+ - share/install/software/StanfordParser
122
+ - share/patterns/drug_induce_disease
123
+ - share/rnorm/cue_default
124
+ - share/rnorm/tokens_default
125
+ - share/wordlists/stopwords
126
+ - test/test_helper.rb
72
127
  - test/rbbt/bow/test_bow.rb
73
128
  - test/rbbt/bow/test_dictionary.rb
74
129
  - test/rbbt/bow/test_misc.rb
75
- - test/rbbt/ner/test_NER.rb
130
+ - test/rbbt/ner/test_regexpNER.rb
76
131
  - test/rbbt/ner/test_abner.rb
77
- - test/rbbt/ner/test_annotations.rb
78
132
  - test/rbbt/ner/test_banner.rb
79
- - test/rbbt/ner/test_oscar3.rb
80
- - test/rbbt/ner/test_regexpNER.rb
133
+ - test/rbbt/ner/annotations/test_transformed.rb
134
+ - test/rbbt/ner/annotations/test_named_entity.rb
81
135
  - test/rbbt/ner/test_token_trieNER.rb
82
- - test/test_helper.rb
136
+ - test/rbbt/ner/test_annotations.rb
137
+ - test/rbbt/ner/test_patterns.rb
138
+ - test/rbbt/ner/test_NER.rb
139
+ - test/rbbt/ner/test_rnorm.rb
140
+ - test/rbbt/ner/test_oscar4.rb
141
+ - test/rbbt/ner/test_chemical_tagger.rb
142
+ - test/rbbt/ner/test_ngram_prefix_dictionary.rb
143
+ - test/rbbt/nlp/test_nlp.rb
144
+ - test/rbbt/corpus/test_corpus.rb
145
+ - test/rbbt/corpus/test_document.rb
146
+ - bin/get_ppis.rb
83
147
  has_rdoc: true
84
148
  homepage: http://github.com/mikisvaz/rbbt-util
85
149
  licenses: []
@@ -110,19 +174,28 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
174
  requirements: []
111
175
 
112
176
  rubyforge_project:
113
- rubygems_version: 1.4.2
177
+ rubygems_version: 1.6.2
114
178
  signing_key:
115
179
  specification_version: 3
116
180
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
117
181
  test_files:
182
+ - test/test_helper.rb
118
183
  - test/rbbt/bow/test_bow.rb
119
184
  - test/rbbt/bow/test_dictionary.rb
120
185
  - test/rbbt/bow/test_misc.rb
121
- - test/rbbt/ner/test_NER.rb
186
+ - test/rbbt/ner/test_regexpNER.rb
122
187
  - test/rbbt/ner/test_abner.rb
123
- - test/rbbt/ner/test_annotations.rb
124
188
  - test/rbbt/ner/test_banner.rb
125
- - test/rbbt/ner/test_oscar3.rb
126
- - test/rbbt/ner/test_regexpNER.rb
189
+ - test/rbbt/ner/annotations/test_transformed.rb
190
+ - test/rbbt/ner/annotations/test_named_entity.rb
127
191
  - test/rbbt/ner/test_token_trieNER.rb
128
- - test/test_helper.rb
192
+ - test/rbbt/ner/test_annotations.rb
193
+ - test/rbbt/ner/test_patterns.rb
194
+ - test/rbbt/ner/test_NER.rb
195
+ - test/rbbt/ner/test_rnorm.rb
196
+ - test/rbbt/ner/test_oscar4.rb
197
+ - test/rbbt/ner/test_chemical_tagger.rb
198
+ - test/rbbt/ner/test_ngram_prefix_dictionary.rb
199
+ - test/rbbt/nlp/test_nlp.rb
200
+ - test/rbbt/corpus/test_corpus.rb
201
+ - test/rbbt/corpus/test_document.rb