rbbt 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +17 -0
  3. data/bin/rbbt_config +180 -0
  4. data/install_scripts/classifier/R/classify.R +36 -0
  5. data/install_scripts/classifier/Rakefile +140 -0
  6. data/install_scripts/get_abner.sh +2 -0
  7. data/install_scripts/get_banner.sh +25 -0
  8. data/install_scripts/get_biocreative.sh +72 -0
  9. data/install_scripts/get_crf++.sh +26 -0
  10. data/install_scripts/get_entrez.sh +4 -0
  11. data/install_scripts/get_go.sh +4 -0
  12. data/install_scripts/get_polysearch.sh +8 -0
  13. data/install_scripts/ner/Rakefile +206 -0
  14. data/install_scripts/ner/config/default.rb +52 -0
  15. data/install_scripts/norm/Rakefile +218 -0
  16. data/install_scripts/norm/config/cue_default.rb +10 -0
  17. data/install_scripts/norm/config/tokens_default.rb +79 -0
  18. data/install_scripts/norm/functions.sh +21 -0
  19. data/install_scripts/organisms/Rakefile +25 -0
  20. data/install_scripts/organisms/cgd.Rakefile +84 -0
  21. data/install_scripts/organisms/human.Rakefile +145 -0
  22. data/install_scripts/organisms/mgi.Rakefile +77 -0
  23. data/install_scripts/organisms/pombe.Rakefile +40 -0
  24. data/install_scripts/organisms/rake-include.rb +258 -0
  25. data/install_scripts/organisms/rgd.Rakefile +88 -0
  26. data/install_scripts/organisms/sgd.Rakefile +66 -0
  27. data/install_scripts/organisms/tair.Rakefile +54 -0
  28. data/install_scripts/organisms/worm.Rakefile +109 -0
  29. data/install_scripts/stopwords +1 -0
  30. data/install_scripts/wordlists/consonants +897 -0
  31. data/install_scripts/wordlists/stopwords +1 -0
  32. data/lib/rbbt/bow/bow.rb +87 -0
  33. data/lib/rbbt/bow/classifier.rb +118 -0
  34. data/lib/rbbt/bow/dictionary.rb +218 -0
  35. data/lib/rbbt/ner/abner.rb +34 -0
  36. data/lib/rbbt/ner/banner.rb +73 -0
  37. data/lib/rbbt/ner/regexpNER.rb +62 -0
  38. data/lib/rbbt/ner/rner.rb +227 -0
  39. data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
  40. data/lib/rbbt/ner/rnorm/tokens.rb +213 -0
  41. data/lib/rbbt/ner/rnorm.rb +142 -0
  42. data/lib/rbbt/sources/biocreative.rb +75 -0
  43. data/lib/rbbt/sources/biomart.rb +106 -0
  44. data/lib/rbbt/sources/entrez.rb +211 -0
  45. data/lib/rbbt/sources/go.rb +40 -0
  46. data/lib/rbbt/sources/organism.rb +197 -0
  47. data/lib/rbbt/sources/polysearch.rb +88 -0
  48. data/lib/rbbt/sources/pubmed.rb +111 -0
  49. data/lib/rbbt/util/arrayHash.rb +255 -0
  50. data/lib/rbbt/util/filecache.rb +72 -0
  51. data/lib/rbbt/util/index.rb +69 -0
  52. data/lib/rbbt/util/misc.rb +101 -0
  53. data/lib/rbbt/util/open.rb +207 -0
  54. data/lib/rbbt/util/simpleDSL.rb +87 -0
  55. data/lib/rbbt/util/tmpfile.rb +19 -0
  56. data/lib/rbbt/version.rb +10 -0
  57. data/lib/rbbt.rb +86 -0
  58. data/tasks/install.rake +123 -0
  59. metadata +114 -0
@@ -0,0 +1,897 @@
1
+ bbr
2
+ sts
3
+ nds
4
+ dgm
5
+ ptl
6
+ ptn
7
+ bsc
8
+ ntl
9
+ nth
10
+ ntm
11
+ rbs
12
+ rpt
13
+ bst
14
+ rdl
15
+ nts
16
+ nsh
17
+ pts
18
+ ccl
19
+ rns
20
+ mpl
21
+ ngl
22
+ rds
23
+ ccr
24
+ rst
25
+ ckn
26
+ ctr
27
+ cts
28
+ nct
29
+ cch
30
+ ndr
31
+ dbl
32
+ dcr
33
+ dgr
34
+ dsh
35
+ dsm
36
+ rth
37
+ rdp
38
+ rkb
39
+ rkd
40
+ rkh
41
+ ngm
42
+ ngs
43
+ rkm
44
+ rkp
45
+ rkr
46
+ rks
47
+ rkt
48
+ rld
49
+ rmh
50
+ rms
51
+ rmw
52
+ rsh
53
+ ldb
54
+ ldn
55
+ thf
56
+ ths
57
+ nch
58
+ stl
59
+ tch
60
+ ggl
61
+ ght
62
+ nkl
63
+ stb
64
+ stw
65
+ ngd
66
+ ngf
67
+ ndh
68
+ cht
69
+ nks
70
+ lps
71
+ lds
72
+ tst
73
+ chs
74
+ lst
75
+ rsn
76
+ rlp
77
+ rlw
78
+ ttl
79
+ rls
80
+ spr
81
+ dth
82
+ ldc
83
+ ldf
84
+ ldl
85
+ lfr
86
+ llf
87
+ msb
88
+ ngn
89
+ llp
90
+ lls
91
+ ndb
92
+ ndf
93
+ ndj
94
+ ndl
95
+ ndm
96
+ ndw
97
+ ngb
98
+ nst
99
+ rgr
100
+ ntr
101
+ shb
102
+ shf
103
+ sps
104
+ stf
105
+ thd
106
+ thh
107
+ rdr
108
+ bbl
109
+ lfg
110
+ lfh
111
+ rbl
112
+ rml
113
+ rmt
114
+ rts
115
+ shc
116
+ shd
117
+ ngt
118
+ shr
119
+ shs
120
+ rcr
121
+ rfr
122
+ rpr
123
+ rsk
124
+ rsp
125
+ rwh
126
+ lth
127
+ ghs
128
+ llb
129
+ llk
130
+ lsh
131
+ lts
132
+ stc
133
+ ngh
134
+ stm
135
+ lch
136
+ lwr
137
+ ntn
138
+ lks
139
+ ltm
140
+ ddl
141
+ ffl
142
+ nsc
143
+ lkw
144
+ ltz
145
+ rch
146
+ thr
147
+ ssl
148
+ ssn
149
+ cks
150
+ btr
151
+ npl
152
+ npr
153
+ str
154
+ shl
155
+ nsk
156
+ nsl
157
+ nsm
158
+ nsp
159
+ ssf
160
+ nwh
161
+ pgr
162
+ nwr
163
+ pbr
164
+ rcl
165
+ psh
166
+ pst
167
+ psw
168
+ rpl
169
+ rsc
170
+ rwr
171
+ scl
172
+ ntf
173
+ nfl
174
+ xpl
175
+ ldm
176
+ nfr
177
+ ngr
178
+ mpr
179
+ rnk
180
+ mpt
181
+ nkn
182
+ rnc
183
+ rnp
184
+ rnt
185
+ rtl
186
+ sks
187
+ lft
188
+ scr
189
+ ltr
190
+ mbr
191
+ ggr
192
+ nsw
193
+ ppr
194
+ ttr
195
+ wns
196
+ nbl
197
+ nbr
198
+ ckl
199
+ ncl
200
+ ncr
201
+ rbr
202
+ rfl
203
+ nsv
204
+ nss
205
+ mbl
206
+ chr
207
+ mph
208
+ mps
209
+ fts
210
+ thb
211
+ thc
212
+ thl
213
+ thp
214
+ pkn
215
+ ppl
216
+ chd
217
+ ghn
218
+ nsd
219
+ nsf
220
+ nsg
221
+ ghb
222
+ ghf
223
+ ghg
224
+ ghl
225
+ ndt
226
+ mbn
227
+ mbs
228
+ mbt
229
+ rsd
230
+ thw
231
+ dst
232
+ llg
233
+ llh
234
+ llt
235
+ xtb
236
+ xts
237
+ nkf
238
+ ckh
239
+ ckw
240
+ ctf
241
+ ctl
242
+ lsp
243
+ ffs
244
+ skm
245
+ Tch
246
+ mst
247
+ chn
248
+ rfb
249
+ tsh
250
+ tbr
251
+ ftl
252
+ ftn
253
+ zzl
254
+ rdf
255
+ bgr
256
+ bpl
257
+ bsp
258
+ btl
259
+ ffr
260
+ lph
261
+ ghw
262
+ ckb
263
+ ckc
264
+ ckm
265
+ ckp
266
+ ckr
267
+ rmb
268
+ rnl
269
+ rnn
270
+ ndp
271
+ rkl
272
+ msh
273
+ rsm
274
+ pch
275
+ ffn
276
+ lln
277
+ chl
278
+ sph
279
+ llw
280
+ spl
281
+ Spr
282
+ shn
283
+ wsh
284
+ wst
285
+ ffb
286
+ ftb
287
+ fth
288
+ ftw
289
+ mnl
290
+ rgh
291
+ ndn
292
+ sms
293
+ gns
294
+ gnt
295
+ ntw
296
+ thn
297
+ pdr
298
+ wbl
299
+ wdr
300
+ wfl
301
+ wpl
302
+ gfr
303
+ gnp
304
+ nkh
305
+ xth
306
+ llc
307
+ rpn
308
+ rps
309
+ wls
310
+ psk
311
+ pwr
312
+ rtf
313
+ rtt
314
+ rtw
315
+ rtb
316
+ rtc
317
+ rtn
318
+ phs
319
+ lfl
320
+ rfd
321
+ rfs
322
+ sch
323
+ ptr
324
+ Sch
325
+ rnf
326
+ tsm
327
+ tsw
328
+ ffm
329
+ lpt
330
+ lsk
331
+ lcl
332
+ lpl
333
+ ltb
334
+ ltc
335
+ ltl
336
+ ltp
337
+ ltw
338
+ ndc
339
+ pph
340
+ stn
341
+ ftr
342
+ ghc
343
+ ghh
344
+ ghr
345
+ shm
346
+ stp
347
+ thm
348
+ chm
349
+ tts
350
+ ngw
351
+ phr
352
+ chf
353
+ dbr
354
+ dsk
355
+ psc
356
+ spb
357
+ ddr
358
+ gnm
359
+ lms
360
+ llm
361
+ mpk
362
+ rrh
363
+ nsb
364
+ rtz
365
+ sth
366
+ ssm
367
+ rtr
368
+ std
369
+ stg
370
+ mpn
371
+ tkn
372
+ rph
373
+ phl
374
+ gsk
375
+ gst
376
+ lgr
377
+ ngp
378
+ psq
379
+ nkt
380
+ nns
381
+ rkw
382
+ rsl
383
+ ssb
384
+ ssk
385
+ ssp
386
+ ssw
387
+ wnb
388
+ ntb
389
+ gms
390
+ tcl
391
+ tcr
392
+ tdr
393
+ tfl
394
+ tgr
395
+ tpl
396
+ tsc
397
+ tsk
398
+ tsp
399
+ ddb
400
+ dds
401
+ ffh
402
+ pht
403
+ pth
404
+ rdn
405
+ msk
406
+ ckt
407
+ wsb
408
+ wsc
409
+ wsf
410
+ wsl
411
+ wsm
412
+ wsp
413
+ wsr
414
+ wss
415
+ wsv
416
+ wsw
417
+ wts
418
+ tzs
419
+ dfl
420
+ dsl
421
+ skr
422
+ shv
423
+ ntg
424
+ ntp
425
+ rtg
426
+ lkm
427
+ sbr
428
+ ssh
429
+ sst
430
+ swr
431
+ lsm
432
+ rlb
433
+ tth
434
+ McC
435
+ lpr
436
+ ckj
437
+ dsp
438
+ wbr
439
+ wdl
440
+ mpp
441
+ pfr
442
+ ftf
443
+ Khm
444
+ llj
445
+ ckd
446
+ lbr
447
+ hnn
448
+ hns
449
+ rgl
450
+ rnm
451
+ shw
452
+ nkb
453
+ nkp
454
+ nkw
455
+ nnk
456
+ rtm
457
+ ctm
458
+ ctn
459
+ rct
460
+ xpr
461
+ xtr
462
+ mpb
463
+ mns
464
+ rnb
465
+ mdr
466
+ lpf
467
+ lpm
468
+ ldr
469
+ nck
470
+ ndq
471
+ gsh
472
+ ndg
473
+ ndk
474
+ rdb
475
+ rdc
476
+ rdh
477
+ rdt
478
+ rdw
479
+ rmf
480
+ wks
481
+ dch
482
+ dph
483
+ msm
484
+ wth
485
+ ldh
486
+ lfb
487
+ lfp
488
+ lfw
489
+ ndd
490
+ ssr
491
+ ngk
492
+ nkg
493
+ rlf
494
+ rlh
495
+ ssc
496
+ lcr
497
+ gwr
498
+ tsb
499
+ rsw
500
+ rsq
501
+ xgl
502
+ ncs
503
+ wch
504
+ lkl
505
+ lkt
506
+ tpr
507
+ ldw
508
+ lmd
509
+ rmn
510
+ shp
511
+ dgl
512
+ lkn
513
+ rrs
514
+ xch
515
+ xcl
516
+ xcr
517
+ lfs
518
+ nsn
519
+ lmm
520
+ nkm
521
+ mpm
522
+ mbb
523
+ mbw
524
+ sdr
525
+ bbs
526
+ cst
527
+ ggb
528
+ ggc
529
+ ggh
530
+ ggn
531
+ ggp
532
+ ggs
533
+ msd
534
+ rkn
535
+ vsk
536
+ btf
537
+ bts
538
+ wnc
539
+ wnf
540
+ wng
541
+ wnh
542
+ wnp
543
+ wnr
544
+ wnt
545
+ wnw
546
+ ctw
547
+ gtr
548
+ scs
549
+ sgr
550
+ skw
551
+ phn
552
+ wsk
553
+ xsw
554
+ spn
555
+ wds
556
+ rnw
557
+ ntd
558
+ chw
559
+ ckf
560
+ gwh
561
+ Chr
562
+ mfl
563
+ msc
564
+ msp
565
+ mpd
566
+ bdr
567
+ lml
568
+ lmn
569
+ mpf
570
+ mpg
571
+ skl
572
+ skn
573
+ dcl
574
+ dsw
575
+ ddh
576
+ lbs
577
+ lkh
578
+ lld
579
+ llr
580
+ kcl
581
+ ksh
582
+ kst
583
+ hms
584
+ dfr
585
+ kth
586
+ tht
587
+ bsl
588
+ rdm
589
+ mbp
590
+ wgr
591
+ rck
592
+ nkr
593
+ chc
594
+ chh
595
+ fst
596
+ wkw
597
+ ckg
598
+ sht
599
+ rcs
600
+ rmc
601
+ rmp
602
+ fghbstr
603
+ rdpl
604
+ rdsw
605
+ rksh
606
+ rldl
607
+ rlds
608
+ rldw
609
+ rthl
610
+ rthw
611
+ chts
612
+ rdst
613
+ ngst
614
+ dths
615
+ ndbr
616
+ ndsc
617
+ ndsh
618
+ ndsl
619
+ ndst
620
+ ndsw
621
+ ngsp
622
+ tchc
623
+ tchd
624
+ thdr
625
+ thst
626
+ rmth
627
+ shcl
628
+ shst
629
+ tchb
630
+ tchf
631
+ tchm
632
+ tchs
633
+ tcht
634
+ ngth
635
+ rstr
636
+ ghtl
637
+ ghts
638
+ llkn
639
+ llsp
640
+ lshm
641
+ stch
642
+ lksw
643
+ llfl
644
+ nstr
645
+ nsch
646
+ nscr
647
+ pstr
648
+ rnst
649
+ lfth
650
+ wnst
651
+ nchr
652
+ nspl
653
+ nssh
654
+ ckst
655
+ mphs
656
+ thbr
657
+ rchl
658
+ chst
659
+ wnsc
660
+ wnsf
661
+ wnsh
662
+ wnsm
663
+ wnsp
664
+ nsgr
665
+ ghbr
666
+ ghtf
667
+ ndth
668
+ mbsc
669
+ ghtr
670
+ ghtw
671
+ mbst
672
+ mptr
673
+ mpts
674
+ nths
675
+ nksg
676
+ tchg
677
+ rdsm
678
+ bscr
679
+ rscr
680
+ ckbr
681
+ rdsh
682
+ rths
683
+ rtsm
684
+ rtsw
685
+ ndpr
686
+ ndtr
687
+ ngsh
688
+ tchp
689
+ rksk
690
+ rpsh
691
+ llsh
692
+ rtfr
693
+ rtsl
694
+ ckpr
695
+ rtbr
696
+ rtst
697
+ lchr
698
+ schn
699
+ Schw
700
+ ptwr
701
+ mstr
702
+ ltsh
703
+ ndbl
704
+ nskr
705
+ rsch
706
+ ghsh
707
+ ndsm
708
+ stpr
709
+ ckcl
710
+ thms
711
+ cksh
712
+ ghth
713
+ ghtn
714
+ mptl
715
+ stgr
716
+ stsc
717
+ nkst
718
+ ncts
719
+ llst
720
+ ttsb
721
+ nthr
722
+ tchw
723
+ wnbr
724
+ rthr
725
+ ntbr
726
+ mphl
727
+ rchm
728
+ rsts
729
+ tspr
730
+ tstr
731
+ ffsh
732
+ ffsp
733
+ ffst
734
+ ldsm
735
+ phth
736
+ wsfl
737
+ wspr
738
+ wssh
739
+ wsst
740
+ tzsc
741
+ ghtc
742
+ ghtd
743
+ ghtg
744
+ ghtm
745
+ ghtt
746
+ rchg
747
+ nthl
748
+ thpr
749
+ lksh
750
+ llwh
751
+ dstr
752
+ dsts
753
+ rksm
754
+ rshm
755
+ tchl
756
+ ncht
757
+ lstr
758
+ nspr
759
+ cksm
760
+ ghtb
761
+ mbsk
762
+ mpsh
763
+ rksp
764
+ tchk
765
+ sthm
766
+ mphr
767
+ nchb
768
+ ntsm
769
+ lmsm
770
+ nchm
771
+ ghsc
772
+ nckl
773
+ tchh
774
+ ndcl
775
+ ndwr
776
+ rdsc
777
+ rshl
778
+ rshn
779
+ lthf
780
+ rtth
781
+ wths
782
+ rspr
783
+ ndch
784
+ rshw
785
+ rlfr
786
+ ssbl
787
+ llbl
788
+ ngpl
789
+ rthc
790
+ cksb
791
+ nchw
792
+ ntst
793
+ lmst
794
+ rstb
795
+ rsth
796
+ rmst
797
+ ldsp
798
+ rpts
799
+ nshr
800
+ rthb
801
+ rspl
802
+ rthm
803
+ rthq
804
+ ggpl
805
+ ggsh
806
+ ghtp
807
+ wngr
808
+ wntr
809
+ wstr
810
+ nctl
811
+ rtsp
812
+ tsch
813
+ pths
814
+ thbl
815
+ tthr
816
+ chsh
817
+ rtsh
818
+ ftsm
819
+ nksh
820
+ wlsp
821
+ sscr
822
+ ssbr
823
+ ssch
824
+ rksc
825
+ rnbr
826
+ rnfl
827
+ lths
828
+ cksc
829
+ rchw
830
+ mscr
831
+ rtwh
832
+ shwh
833
+ lfsk
834
+ mpgr
835
+ nksm
836
+ cksk
837
+ ckwh
838
+ llfr
839
+ kthr
840
+ stpl
841
+ stst
842
+ mbpr
843
+ mbsh
844
+ rthp
845
+ rthd
846
+ dspr
847
+ schw
848
+ ckdr
849
+ ckgr
850
+ cksl
851
+ cksp
852
+ cktr
853
+ llpl
854
+ shtr
855
+ rchb
856
+ rchd
857
+ rcht
858
+ rmchrldsh
859
+ rthwh
860
+ chtsm
861
+ ndscr
862
+ ngspr
863
+ tchcr
864
+ tchst
865
+ ngths
866
+ llspr
867
+ ndths
868
+ mbscr
869
+ tchbl
870
+ ndthr
871
+ rtswr
872
+ stscr
873
+ ffspr
874
+ rdstr
875
+ tzsch
876
+ ghtcl
877
+ ghtdr
878
+ ghtsh
879
+ ghtst
880
+ ngthw
881
+ ckstr
882
+ ghsch
883
+ rdscr
884
+ rtstr
885
+ rtthr
886
+ lmstr
887
+ rthsh
888
+ wnstr
889
+ tschm
890
+ rkscr
891
+ tchph
892
+ rscht
893
+ ststr
894
+ rthpl
895
+ ckscr
896
+ rmstrtchstr
897
+ tchphr