rbbt 1.1.7 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +72 -136
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -246
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -145
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -79
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Rakefile +0 -43
  22. data/install_scripts/organisms/cgd.Rakefile +0 -84
  23. data/install_scripts/organisms/human.Rakefile +0 -145
  24. data/install_scripts/organisms/mgi.Rakefile +0 -77
  25. data/install_scripts/organisms/pombe.Rakefile +0 -40
  26. data/install_scripts/organisms/rake-include.rb +0 -258
  27. data/install_scripts/organisms/rgd.Rakefile +0 -88
  28. data/install_scripts/organisms/sgd.Rakefile +0 -66
  29. data/install_scripts/organisms/tair.Rakefile +0 -54
  30. data/install_scripts/organisms/worm.Rakefile +0 -109
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -86
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -40
  49. data/lib/rbbt/sources/organism.rb +0 -245
  50. data/lib/rbbt/sources/polysearch.rb +0 -117
  51. data/lib/rbbt/sources/pubmed.rb +0 -111
  52. data/lib/rbbt/util/arrayHash.rb +0 -255
  53. data/lib/rbbt/util/filecache.rb +0 -72
  54. data/lib/rbbt/util/index.rb +0 -47
  55. data/lib/rbbt/util/misc.rb +0 -106
  56. data/lib/rbbt/util/open.rb +0 -235
  57. data/lib/rbbt/util/rake.rb +0 -183
  58. data/lib/rbbt/util/simpleDSL.rb +0 -87
  59. data/lib/rbbt/util/tmpfile.rb +0 -19
  60. data/tasks/install.rake +0 -124
@@ -1,145 +0,0 @@
1
- require __FILE__.sub(/[^\/]*$/,'') + '../rake-include'
2
-
3
- $name = "Homo sapiens"
4
-
5
-
6
- $native_id = "Entrez Gene ID"
7
-
8
- $entrez2native = {
9
- :tax => 9606,
10
- :fix => nil,
11
- :check => proc{|code| false},
12
- }
13
-
14
- $lexicon = {
15
- :biomart => {
16
- :database => 'hsapiens_gene_ensembl',
17
- :main => ['Entrez Gene ID' , "entrezgene"],
18
- :extra => [
19
- [ 'Associated Gene Name' , "external_gene_id"],
20
- [ 'HGNC symbol', "hgnc_symbol" ],
21
- [ 'HGNC automatic gene name', "hgnc_automatic_gene_name" ],
22
- [ 'HGNC curated gene name ', "hgnc_curated_gene_name" ],
23
- ],
24
- }
25
-
26
- }
27
-
28
- $identifiers = {
29
- :biomart => {
30
- :database => 'hsapiens_gene_ensembl',
31
- :main => ['Entrez Gene ID' , "entrezgene"],
32
- :extra => [
33
- [ 'Ensembl Gene ID', "ensembl_gene_id" ],
34
- [ 'Ensembl Protein ID', "ensembl_peptide_id" ],
35
- [ 'Associated Gene Name', "external_gene_id" ],
36
- [ 'CCDS ID', "ccds" ],
37
- [ 'Protein ID', "protein_id" ],
38
- [ 'RefSeq Protein ID', "refseq_peptide" ],
39
- [ 'Unigene ID', "unigene" ],
40
- [ 'UniProt/SwissProt ID', "uniprot_swissprot" ],
41
- [ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
42
- [ 'HGNC ID', "hgnc_id", 'HGNC'],
43
- ['EMBL (Genbank) ID' , "embl"] ,
44
-
45
- # Affymetrix
46
- [ 'AFFY HC G110', 'affy_hc_g110' ],
47
- [ 'AFFY HG FOCUS', 'affy_hg_focus' ],
48
- [ 'AFFY HG U133-PLUS-2', 'affy_hg_u133_plus_2' ],
49
- [ 'AFFY HG U133A_2', 'affy_hg_u133a_2' ],
50
- [ 'AFFY HG U133A', 'affy_hg_u133a' ],
51
- [ 'AFFY HG U133B', 'affy_hg_u133b' ],
52
- [ 'AFFY HG U95AV2', 'affy_hg_u95av2' ],
53
- [ 'AFFY HG U95B', 'affy_hg_u95b' ],
54
- [ 'AFFY HG U95C', 'affy_hg_u95c' ],
55
- [ 'AFFY HG U95D', 'affy_hg_u95d' ],
56
- [ 'AFFY HG U95E', 'affy_hg_u95e' ],
57
- [ 'AFFY HG U95A', 'affy_hg_u95a' ],
58
- [ 'AFFY HUGENEFL', 'affy_hugenefl' ],
59
- [ 'AFFY HuEx', 'affy_huex_1_0_st_v2' ],
60
- [ 'AFFY HuGene', 'affy_hugene_1_0_st_v1' ],
61
- [ 'AFFY U133 X3P', 'affy_u133_x3p' ],
62
- [ 'Agilent WholeGenome',"agilent_wholegenome" ],
63
- [ 'Agilent CGH 44b', 'agilent_cgh_44b' ],
64
- [ 'Codelink ID', 'codelink' ],
65
- [ 'Illumina HumanWG 6 v2', 'illumina_humanwg_6_v2' ],
66
- [ 'Illumina HumanWG 6 v3', 'illumina_humanwg_6_v3' ],
67
-
68
- ],
69
- :filter => [],
70
- }
71
- }
72
-
73
- $go = {
74
- :url => "http://cvsweb.geneontology.org/cgi-bin/cvsweb.cgi/go/gene-associations/gene_association.goa_human.gz?rev=HEAD",
75
- :code => 2,
76
- :go => 4,
77
- :pmid => 5,
78
- }
79
-
80
- $query = '"humans"[MeSH Terms] AND ((("genes"[TIAB] NOT Medline[SB]) OR "genes"[MeSH Terms] OR gene[Text Word]) OR (("proteins"[TIAB] NOT Medline[SB]) OR "proteins"[MeSH Terms] OR protein[Text Word])) AND (hasabstract[text] AND "humans"[MeSH Terms] AND English[lang])'
81
- ##########################
82
-
83
- require 'rbbt/util/index'
84
-
85
- Rake::Task['gene.go'].clear
86
- file 'gene.go' => ['identifiers'] do
87
- if File.exists? 'identifiers'
88
- require 'rbbt/sources/organism'
89
- index = Organism.id_index('human', :other => ['Associated Gene Name'])
90
- data = Open.to_hash($go[:url], :native => $go[:code], :extra => $go[:go], :exclude => $go[:exclude])
91
-
92
- data = data.collect{|code, value_lists|
93
- code = index[code]
94
- [code, value_lists.flatten.select{|ref| ref =~ /GO:\d+/}.collect{|ref| ref.match(/(GO:\d+)/)[1]}]
95
- }.select{|p| p[0] && p[1].any?}
96
-
97
- Open.write('gene.go',
98
- data.collect{|p|
99
- p[1].uniq.collect{|go|
100
- "#{p[0]}\t#{go}"
101
- }.join("\n")
102
- }.join("\n")
103
- )
104
- end
105
- end
106
-
107
- Rake::Task['gene_go.pmid'].clear
108
- file 'gene_go.pmid' => ['identifiers'] do
109
- if File.exists? 'identifiers'
110
- index = Index.index('identifiers')
111
- data = Open.to_hash($go[:url], :native => $go[:code], :extra => $go[:pmid], :exclude => $go[:exclude])
112
-
113
- data = data.collect{|code, value_lists|
114
- code = index[code]
115
- [code, value_lists.flatten.select{|ref| ref =~ /PMID:\d+/}.collect{|ref| ref.match(/PMID:(\d+)/)[1]}]
116
- }.select{|p| p[0] && p[1].any?}
117
-
118
- Open.write('gene_go.pmid',
119
- data.collect{|p|
120
- p[1].uniq.collect{|pmid|
121
- "#{p[0]}\t#{pmid}"
122
- }.join("\n")
123
- }.join("\n")
124
- )
125
- end
126
- end
127
-
128
-
129
- Rake::Task['lexicon'].clear
130
- file 'lexicon' => ['identifiers'] do
131
- if File.exists? 'identifiers'
132
- require 'rbbt/sources/organism'
133
- HGNC_URL = 'http://www.genenames.org/cgi-bin/hgnc_downloads.cgi?title=HGNC+output+data&hgnc_dbtag=on&col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_prev_name&col=gd_aliases&col=gd_name_aliases&col=gd_pub_acc_ids&status=Approved&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag'
134
- names = Open.to_hash(HGNC_URL, :exclude => proc{|l| l.match(/^HGNC ID/)}, :flatten => true)
135
- translations = Organism.id_index('human', :native => 'Entrez Gene ID', :other => ['HGNC ID'])
136
-
137
- Open.write('lexicon',
138
- names.collect{|code, names|
139
- next unless translations[code]
140
- ([translations[code]] + names).join("\t")
141
- }.compact.join("\n")
142
- )
143
- end
144
-
145
- end
@@ -1,77 +0,0 @@
1
- require __FILE__.sub(/[^\/]*$/,'') + '../rake-include'
2
-
3
- $name = "Mus musculus"
4
-
5
-
6
- $native_id = "MGI DB ID"
7
-
8
- $entrez2native = {
9
- :tax => 10090,
10
- :fix => nil,
11
- :check => proc{|code| code.match(/^MGI/)},
12
- }
13
-
14
- $lexicon = {
15
- :file => {
16
- :url => "ftp://ftp.informatics.jax.org/pub/reports/MGI_Coordinate.rpt",
17
- :native => 0,
18
- :extra => [2,3],
19
- :exclude => proc{|l| l.split(/\t/)[1] != "Gene"},
20
- },
21
- }
22
-
23
- $identifiers = {
24
- :file => {
25
- :url => "ftp://ftp.informatics.jax.org/pub/reports/MGI_Coordinate.rpt",
26
- :native => 0,
27
- :extra => [],
28
- :exclude => proc{|l| l.split(/\t/)[1] != "Gene"},
29
- },
30
- :biomart => {
31
- :database => 'mmusculus_gene_ensembl',
32
- :main => ['MGI DB ID', 'mgi_id'] ,
33
- :extra => [
34
- ['Associated Gene Name' , "external_gene_id"],
35
- ['Protein ID' , "protein_id"] ,
36
- ['UniProt/SwissProt ID' , "uniprot_swissprot"] ,
37
- ['Unigene ID' , "unigene"] ,
38
- ['UniProt/SwissProt Accession' , "uniprot_swissprot_accession"] ,
39
- ['RefSeq Protein ID' , "refseq_peptide"] ,
40
- ['EMBL (Genbank) ID' , "embl"] ,
41
-
42
- ['Affy mg u74a',"affy_mg_u74a" ],
43
- ['Affy mg u74av2',"affy_mg_u74av2" ],
44
- ['Affy mg u74b',"affy_mg_u74b" ],
45
- ['Affy mg u74bv2',"affy_mg_u74bv2" ],
46
- ['Affy mg u74c',"affy_mg_u74c" ],
47
- ['Affy mg u74cv2',"affy_mg_u74cv2" ],
48
- ['Affy moe430a',"affy_moe430a" ],
49
- ['Affy moe430b',"affy_moe430b" ],
50
- ['AFFY MoEx',"affy_moex_1_0_st_v1" ],
51
- ['AFFY MoGene',"affy_mogene_1_0_st_v1" ],
52
- ['Affy mouse430 2',"affy_mouse430_2" ],
53
- ['Affy mouse430a 2',"affy_mouse430a_2" ],
54
- ['Affy mu11ksuba',"affy_mu11ksuba" ],
55
- ['Affy mu11ksubb',"affy_mu11ksubb" ],
56
- ['Agilent WholeGenome',"agilent_wholegenome" ],
57
- ['Codelink ID',"codelink" ],
58
- ['Illumina MouseWG 6 v1',"illumina_mousewg_6_v1" ],
59
- ['Illumina MouseWG 6 v2',"illumina_mousewg_6_v2" ],
60
-
61
- ],
62
- :filter => ['with_mgi'], # This is needed as the filter is not with_mgi_id as was expected
63
- }
64
- }
65
-
66
- $go = {
67
- :url => "ftp://ftp.geneontology.org/go/gene-associations/gene_association.mgi.gz",
68
- :code => 1,
69
- :go => 4,
70
- :pmid => 5,
71
- }
72
-
73
- $query = '(("mice"[TIAB] NOT Medline[SB]) OR "mice"[MeSH Terms] OR mouse[Text Word]) AND ((("genes"[TIAB] NOT Medline[SB]) OR "genes"[MeSH Terms] OR gene[Text Word]) OR (("proteins"[TIAB] NOT Medline[SB]) OR "proteins"[MeSH Terms] OR protein[Text Word]))'
74
- ##########################
75
-
76
-
77
-
@@ -1,40 +0,0 @@
1
- require __FILE__.sub(/[^\/]*$/,'') + '../rake-include'
2
-
3
- $name = "Schizosaccharomyces pombe"
4
-
5
-
6
- $native_id = "GeneDB Id"
7
-
8
- $entrez2native = {
9
- :tax => 4896,
10
- :fix => proc{|code| code.sub(/GeneDB:SP/,'SP') },
11
- :check => proc{|code| code.match(/^SP/)},
12
- }
13
-
14
- $lexicon = {
15
- :file => {
16
- :url => 'ftp://ftp.sanger.ac.uk/pub/yeast/pombe/Mappings/allNames.txt',
17
- :native => 0,
18
- :extra => [1,2,3,4,5,6,7,8]
19
- },
20
- }
21
-
22
- $identifiers = {
23
- :file => {
24
- :url => 'ftp://ftp.sanger.ac.uk/pub/yeast/pombe/Mappings/allNames.txt',
25
- :native => 0,
26
- :extra => [],
27
- },
28
- }
29
-
30
- $go = {
31
- :url => "ftp://ftp.sanger.ac.uk/pub/yeast/pombe/Gene_ontology/gene_association.GeneDB_Spombe",
32
- :code => 1,
33
- :go => 4,
34
- :pmid => 5,
35
- }
36
-
37
- $query = 'pombe[All Fields] AND (hasabstract[text] AND English[lang])'
38
- ####
39
-
40
-
@@ -1,258 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/open'
3
- require 'rbbt/util/arrayHash'
4
- require 'rbbt/sources/biomart'
5
- require 'rbbt/sources/entrez'
6
- require 'rbbt/sources/pubmed'
7
-
8
-
9
-
10
- file 'name' do
11
- Open.write('name', $name)
12
- end
13
-
14
- file 'all.pmid' do
15
- Open.write('all.pmid', PubMed.query($query).join("\n"))
16
- end
17
-
18
- file 'lexicon' do
19
- begin
20
-
21
- data = nil
22
- # Read from file
23
- if $lexicon[:file]
24
- file = Open.to_hash($lexicon[:file][:url], $lexicon[:file])
25
- data = ArrayHash.new(file, $native_id)
26
- end
27
-
28
- # Translate from entrez to native if needed
29
- if $entrez2native
30
- translations = {}
31
- Entrez.entrez2native(*$entrez2native.values_at(:tax,:native,:fix,:check)).
32
- each{|k,v|
33
- translations[k] = [v.join("|")]
34
- }
35
- translations_data = ArrayHash.new(translations,'Entrez Gene ID', [$native_id])
36
- if data
37
- data.merge(translations_data)
38
- else
39
- data = translations_data
40
- end
41
-
42
- end
43
-
44
-
45
- # Read from Biomart and merge with previous data
46
- if $lexicon[:biomart]
47
- biomart = {}
48
-
49
- BioMart.query(
50
- $lexicon[:biomart][:database],
51
- $lexicon[:biomart][:main][1],
52
- $lexicon[:biomart][:extra].collect{|v| v[1]},
53
- $lexicon[:biomart][:filter]
54
- ).each{|key, values_list|
55
- values = values_list.values_at(*$lexicon[:biomart][:extra].collect{|v| v[1]}).compact.collect{|list| list.select{|e| e.to_s != ""}.uniq.join("|")}
56
- biomart[key] = values
57
- }
58
-
59
- biomart_data = ArrayHash.new(biomart, $lexicon[:biomart][:main][0], $lexicon[:biomart][:extra].collect{|v| v[0]})
60
-
61
- if data
62
- if $lexicon[:biomart][:extra].collect{|v| v[1]}.include?( $native_id )|| $lexicon[:biomart][:main][0] == $native_id
63
- field = $native_id
64
- else
65
- field = 'Entrez Gene ID'
66
- end
67
- data.merge(biomart_data, field)
68
- else
69
- data = biomart_data
70
- end
71
- end
72
-
73
- if $entrez2native
74
- gene_alias = {}
75
- Entrez.entrez2native($entrez2native[:tax],4).
76
- each{|k,v|
77
- gene_alias[k] = [v.select{|e| e.to_s != ""}.join("|")]
78
- }
79
- if gene_alias.keys.any?
80
- gene_alias_data = ArrayHash.new(gene_alias,'Entrez Gene ID', ['Entrez Gene Alias'])
81
- data.merge(gene_alias_data, 'Entrez Gene ID')
82
- end
83
- end
84
-
85
- data.remove('Entrez Gene ID')
86
- data.clean
87
- Open.write('lexicon', data.data.collect{|code, name_lists|
88
- "#{ code }\t" + name_lists.flatten.select{|n| n.to_s != ""}.uniq.join("\t")
89
- }.join("\n"))
90
-
91
- rescue Entrez::NoFileError
92
- puts "Lexicon not produced for #{$name}, install the entrez gene_info file (rbbt_config install entrez)."
93
- end
94
- end
95
-
96
-
97
- file 'identifiers' do
98
-
99
- begin
100
- data = nil
101
- if $identifiers[:file]
102
- file = Open.to_hash($identifiers[:file][:url], $identifiers[:file])
103
- data = ArrayHash.new(file, $native_id, $identifiers[:file][:fields])
104
- end
105
-
106
- # Translate from entrez to native if needed
107
- if $entrez2native
108
- translations = {}
109
- Entrez.entrez2native(*$entrez2native.values_at(:tax,:native,:fix,:check)).
110
- each{|k,v|
111
- translations[k] = [v.join("|")]
112
- }
113
- if translations.keys.any?
114
- translations_data = ArrayHash.new(translations,'Entrez Gene ID', [$native_id])
115
- if data
116
- data.merge(translations_data)
117
- else
118
- data = translations_data
119
- end
120
- end
121
-
122
- end
123
-
124
-
125
- # Read from Biomart and merge with previous data
126
- if $identifiers[:biomart]
127
- biomart = {}
128
-
129
- BioMart.query(
130
- $identifiers[:biomart][:database],
131
- $identifiers[:biomart][:main][1],
132
- $identifiers[:biomart][:extra].collect{|v| v[1]},
133
- $identifiers[:biomart][:filter]
134
- ).each{|key, values_list|
135
- values = values_list.values_at(*$identifiers[:biomart][:extra].collect{|v| v[1]}).compact.collect{|list| list.select{|e| e.to_s != ""}.uniq.join("|")}
136
- biomart[key] = values
137
- }
138
-
139
- biomart_data = ArrayHash.new(biomart, $identifiers[:biomart][:main][0], $identifiers[:biomart][:extra].collect{|v| v[0]})
140
- $identifiers[:biomart][:extra].each{|values|
141
- if values[2]
142
- biomart_data.process(values[0]){|n| "#{values[2]}:#{n}"}
143
- end
144
- }
145
-
146
-
147
- if data
148
- if $identifiers[:biomart][:extra].collect{|v| v[1]}.include?( $native_id ) || $identifiers[:biomart][:main][0] == $native_id
149
- field = $native_id
150
- else
151
- field = 'Entrez Gene ID'
152
- end
153
- data.merge(biomart_data, field)
154
- else
155
- data = biomart_data
156
- end
157
- end
158
-
159
-
160
- # Add the alias at the end
161
- if $entrez2native
162
- gene_alias = {}
163
- Entrez.entrez2native($entrez2native[:tax],4).
164
- each{|k,v|
165
- gene_alias[k] = [v.join("|")]
166
- }
167
- if gene_alias.keys.any?
168
- gene_alias_data = ArrayHash.new(gene_alias,'Entrez Gene ID', ['Entrez Gene Alias'])
169
- if data
170
- data.merge(gene_alias_data, 'Entrez Gene ID')
171
- else
172
- data = gene_alias_data
173
- end
174
- end
175
- end
176
-
177
-
178
-
179
- # Write ids to file
180
- fout = File.open('identifiers', 'w')
181
- fout.puts "##{$native_id}\t" + data.fields.join("\t")
182
- data.clean
183
- data.data.each{|code, values|
184
- fout.puts code + "\t" + values.join("\t")
185
- }
186
- fout.close
187
-
188
- rescue Entrez::NoFileError
189
- puts "Identifiers not produced for #{$name}, install the entrez gene_info file (rbbt_config install entrez)."
190
- end
191
- end
192
-
193
-
194
- file 'gene.go' do
195
- data = Open.to_hash($go[:url], :native => $go[:code], :extra => $go[:go], :exclude => $go[:exclude], :fix => $go[:fix])
196
-
197
- data = data.collect{|code, value_lists|
198
- [code, value_lists.flatten.select{|ref| ref =~ /GO:\d+/}.collect{|ref| ref.match(/(GO:\d+)/)[1]}]
199
- }.select{|p| p[1].any?}
200
-
201
- Open.write('gene.go',
202
- data.collect{|p|
203
- p[1].uniq.collect{|go|
204
- "#{p[0]}\t#{go}"
205
- }.join("\n")
206
- }.join("\n")
207
- )
208
- end
209
-
210
- file 'gene_go.pmid' do
211
- data = Open.to_hash($go[:url], :native => $go[:code], :extra => $go[:pmid], :exclude => $go[:exclude], :fix => $go[:fix])
212
-
213
- data = data.collect{|code, value_lists|
214
- [code, value_lists.flatten.select{|ref| ref =~ /PMID:\d+/}.collect{|ref| ref.match(/PMID:(\d+)/)[1]}]
215
- }.select{|p| p[1].any?}
216
-
217
- Open.write('gene_go.pmid',
218
- data.collect{|p|
219
- p[1].uniq.collect{|pmid| "#{p[0]}\t#{pmid}" }.join("\n")
220
- }.join("\n")
221
- )
222
- end
223
-
224
-
225
- file 'gene.pmid' do
226
- begin
227
- translations = Entrez.entrez2native(*$entrez2native.values_at(:tax,:native,:fix,:check)) if $native_id != "Entrez Gene ID"
228
-
229
- data = Entrez.entrez2pubmed($entrez2native[:tax])
230
-
231
- Open.write('gene.pmid',
232
- data.collect{|code,pmids|
233
- next if translations && ! translations[code]
234
- code = translations[code].first if translations
235
- pmids.collect{|pmid|
236
- "#{ code }\t#{pmid}"
237
- }.compact.join("\n")
238
- }.compact.join("\n")
239
- )
240
- rescue Entrez::NoFileError
241
- puts "Gene article associations from entrez not produced, install the gene2pumbed file (rbbt_config install entrez)."
242
- end
243
-
244
- end
245
-
246
-
247
-
248
-
249
- task 'all' => ['name', 'lexicon', 'identifiers', 'gene_go.pmid', 'gene.pmid', 'gene.go', 'all.pmid']
250
- task 'clean' do
251
- `rm -f 'name' 'lexicon' 'identifiers' 'gene_go.pmid' 'gene.pmid' 'gene.go' 'all.pmid'`
252
- end
253
-
254
- task 'update' do
255
- Rake::Task['clean'].invoke if $force
256
- Rake::Task['all'].invoke
257
- end
258
-