bio 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,23 +1,10 @@
1
1
  #
2
2
  # test/unit/bio/db/embl/test_embl.rb - Unit test for Bio::EMBL
3
3
  #
4
- # Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
4
+ # Copyright:: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ # License:: The Ruby License
5
6
  #
6
- # This library is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU Lesser General Public
8
- # License as published by the Free Software Foundation; either
9
- # version 2 of the License, or (at your option) any later version.
10
- #
11
- # This library is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # Lesser General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU Lesser General Public
17
- # License along with this library; if not, write to the Free Software
18
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
- #
20
- # $Id: test_embl.rb,v 1.3 2005/11/23 10:02:42 nakao Exp $
7
+ # $Id: test_embl.rb,v 1.5 2007/04/05 23:35:43 trevor Exp $
21
8
  #
22
9
 
23
10
  require 'pathname'
@@ -0,0 +1,219 @@
1
+ #
2
+ # test/unit/bio/db/embl/test_embl_rel89.rb - Unit test for Bio::EMBL
3
+ #
4
+ # Copyright:: Copyright (C) 2007 Mitsuteru Nakao <n@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id: test_embl_rel89.rb,v 1.2 2007/04/05 23:35:43 trevor Exp $
8
+ #
9
+
10
+ require 'pathname'
11
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
12
+ $:.unshift(libpath) unless $:.include?(libpath)
13
+
14
+
15
+ require 'test/unit'
16
+ require 'bio/db/embl/embl'
17
+
18
+ module Bio
19
+ class TestEMBL < Test::Unit::TestCase
20
+
21
+ def setup
22
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
23
+ output = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
24
+ @obj = Bio::EMBL.new(output)
25
+ end
26
+
27
+ # http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#s_3_4_1
28
+ def test_id_line
29
+ assert(@obj.id_line)
30
+ end
31
+
32
+ def test_id_line_iterator
33
+ assert(@obj.id_line {|key, value| })
34
+ end
35
+
36
+ def test_id_line_entry_name
37
+ assert_equal('AB090716', @obj.id_line('ENTRY_NAME'))
38
+ end
39
+
40
+ def test_id_line_data_class
41
+ assert_equal('STD', @obj.id_line('DATA_CLASS'))
42
+ end
43
+
44
+ def test_id_line_sequence_version
45
+ assert_equal('1', @obj.id_line('SEQUENCE_VERSION'))
46
+ end
47
+
48
+ def test_id_line_molecule_type
49
+ assert_equal('genomic DNA', @obj.id_line('MOLECULE_TYPE'))
50
+ end
51
+
52
+ def test_id_line_division
53
+ assert_equal('VRT', @obj.id_line('DIVISION'))
54
+ end
55
+
56
+ def test_id_line_sequence_length
57
+ assert_equal(166, @obj.id_line('SEQUENCE_LENGTH'))
58
+ end
59
+
60
+ def test_entry
61
+ entry_id = 'AB090716'
62
+ assert_equal(entry_id, @obj.entry)
63
+ assert_equal(entry_id, @obj.entry_name)
64
+ assert_equal(entry_id, @obj.entry_id)
65
+ end
66
+
67
+ def test_molecule
68
+ molecule = 'genomic DNA'
69
+ assert_equal(molecule, @obj.molecule)
70
+ assert_equal(molecule, @obj.molecule_type)
71
+ end
72
+
73
+ def test_division
74
+ assert_equal('VRT', @obj.division)
75
+ end
76
+
77
+ def test_sequence_length
78
+ seqlen = 166
79
+ assert_equal(seqlen, @obj.sequence_length)
80
+ assert_equal(seqlen, @obj.seqlen)
81
+ end
82
+
83
+ # Bio::EMBLDB::COMMON#ac
84
+ def test_ac
85
+ ac = ['AB090716']
86
+ assert_equal(ac, @obj.ac)
87
+ assert_equal(ac, @obj.accessions)
88
+ end
89
+
90
+ # Bio::EMBLDB::COMMON#accession
91
+ def test_accession
92
+ assert_equal('AB090716', @obj.accession)
93
+ end
94
+
95
+ def test_sv
96
+ assert_equal('AB090716.1', @obj.sv)
97
+ end
98
+
99
+ def test_version
100
+ assert_equal(1, @obj.version)
101
+ end
102
+
103
+ def test_dt
104
+ assert(@obj.dt)
105
+ end
106
+
107
+ def test_dt_iterator
108
+ assert(@obj.dt {|key, value| })
109
+ end
110
+
111
+ def test_dt_created
112
+ assert_equal('25-OCT-2002 (Rel. 73, Created)', @obj.dt('created'))
113
+ end
114
+
115
+ def test_dt_updated
116
+ assert_equal('14-NOV-2006 (Rel. 89, Last updated, Version 3)', @obj.dt('updated'))
117
+ end
118
+
119
+ # Bio::EMBLDB::COMMON#de
120
+ def test_de
121
+ assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @obj.de)
122
+ end
123
+
124
+ # Bio::EMBLDB::COMMON#kw
125
+ def test_kw
126
+ k = []
127
+ assert_equal([], @obj.kw)
128
+ assert_equal([], @obj.keywords)
129
+ end
130
+
131
+ def test_os
132
+ # assert_equal('', @obj.os)
133
+ assert_raises(RuntimeError) { @obj.os }
134
+ end
135
+
136
+ def test_os_valid
137
+ @obj.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
138
+ assert_equal("Haplochromis sp. 'muzu rukwa'", @obj.os)
139
+ end
140
+
141
+ # Bio::EMBLDB::COMMON#oc
142
+ def test_oc
143
+ assert_equal('Eukaryota', @obj.oc.first)
144
+ end
145
+
146
+ # Bio::EMBLDB::COMMON#og
147
+ def test_og
148
+ assert_equal([], @obj.og)
149
+ end
150
+
151
+ # Bio::EMBLDB::COMMON#ref
152
+ def test_ref
153
+ assert_equal(2, @obj.ref.size)
154
+ end
155
+
156
+ # Bio::EMBLDB::COMMON#references
157
+ def test_references
158
+ assert_equal(Bio::References, @obj.references.class)
159
+ end
160
+
161
+ # Bio::EMBLDB::COMMON#dr
162
+ def test_dr
163
+ assert_equal({}, @obj.dr)
164
+ end
165
+
166
+ def test_fh
167
+ assert_equal('Key Location/Qualifiers', @obj.fh)
168
+ end
169
+
170
+ def test_ft
171
+ assert_equal(Bio::Features, @obj.ft.class)
172
+ end
173
+
174
+ def test_ft_iterator
175
+ @obj.ft.each do |feature|
176
+ assert_equal(Bio::Feature, feature.class)
177
+ end
178
+ end
179
+
180
+ def test_ft_accessor
181
+ assert_equal('CDS', @obj.ft.features[1].feature)
182
+ end
183
+
184
+ def test_each_cds
185
+ @obj.each_cds do |x|
186
+ assert_equal('CDS', x.feature)
187
+ end
188
+ end
189
+
190
+ def test_each_gene
191
+ @obj.each_gene do |x|
192
+ assert_equal('gene', x.feature)
193
+ end
194
+ end
195
+
196
+ def test_cc
197
+ assert_equal('', @obj.cc)
198
+ end
199
+
200
+ # def test_xx
201
+ # end
202
+
203
+ def test_sq
204
+ data = {"a"=>29, "c"=>42, "ntlen"=>166, "g"=>41, "t"=>54, "other"=>0}
205
+ assert_equal(data, @obj.sq)
206
+ end
207
+
208
+ def test_sq_get
209
+ assert_equal(29, @obj.sq("a"))
210
+ end
211
+
212
+ def test_seq
213
+ seq = 'gttctggcctcatggactgaagacttcctgtggacctgatgtgttcagtggaagtgaagaccctggagtacagtcctacatgattgttctcatgattacttgctgtttcatccccctggctatcatcatcctgtgctaccttgctgtgtggatggccatccgtgct'
214
+ assert_equal(seq, @obj.seq)
215
+ assert_equal(seq, @obj.naseq)
216
+ assert_equal(seq, @obj.ntseq)
217
+ end
218
+ end
219
+ end
@@ -1,27 +1,15 @@
1
1
  #
2
2
  # test/unit/bio/db/embl/test_sptr.rb - Unit test for Bio::SPTR
3
3
  #
4
- # Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
4
+ # Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ # License:: The Ruby License
5
6
  #
6
- # This library is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU Lesser General Public
8
- # License as published by the Free Software Foundation; either
9
- # version 2 of the License, or (at your option) any later version.
10
- #
11
- # This library is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # Lesser General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU Lesser General Public
17
- # License along with this library; if not, write to the Free Software
18
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
- #
20
- # $Id: test_sptr.rb,v 1.2 2005/11/23 10:06:12 nakao Exp $
7
+ # $Id: test_sptr.rb,v 1.7 2007/04/05 23:35:43 trevor Exp $
21
8
  #
22
9
 
23
10
  require 'pathname'
24
- libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
11
+ libpath = Pathname.new(File.join(File.dirname(__FILE__),
12
+ ['..'] * 5, 'lib')).cleanpath.to_s
25
13
  $:.unshift(libpath) unless $:.include?(libpath)
26
14
 
27
15
  require 'test/unit'
@@ -31,8 +19,11 @@ module Bio
31
19
  class TestSPTR < Test::Unit::TestCase
32
20
 
33
21
  def setup
34
- bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
35
- data = File.open(File.join(bioruby_root, 'test', 'data', 'uniprot', 'p53_human.uniprot')).read
22
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__),
23
+ ['..'] * 5)).cleanpath.to_s
24
+ data = File.open(File.join(bioruby_root,
25
+ 'test', 'data', 'uniprot',
26
+ 'p53_human.uniprot')).read
36
27
  @obj = Bio::SPTR.new(data)
37
28
  end
38
29
 
@@ -75,7 +66,10 @@ module Bio
75
66
  end
76
67
 
77
68
  def test_ac
78
- acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807", "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q86UG1", "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2", "Q9NZD0", "Q9UBI2", "Q9UQ61"]
69
+ acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807",
70
+ "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q86UG1",
71
+ "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2",
72
+ "Q9NZD0", "Q9UBI2", "Q9UQ61"]
79
73
  assert_equal(acs, @obj.ac)
80
74
  assert_equal(acs, @obj.accessions)
81
75
  end
@@ -93,11 +87,13 @@ module Bio
93
87
  end
94
88
 
95
89
  def test_dt_sequence
96
- assert_equal('01-MAR-1989 (Rel. 10, Last sequence update)', @obj.dt('sequence'))
90
+ assert_equal('01-MAR-1989 (Rel. 10, Last sequence update)',
91
+ @obj.dt('sequence'))
97
92
  end
98
93
 
99
94
  def test_dt_annotation
100
- assert_equal('13-SEP-2005 (Rel. 48, Last annotation update)', @obj.dt('annotation'))
95
+ assert_equal('13-SEP-2005 (Rel. 48, Last annotation update)',
96
+ @obj.dt('annotation'))
101
97
  end
102
98
 
103
99
  def test_de
@@ -109,21 +105,25 @@ module Bio
109
105
  end
110
106
 
111
107
  def test_synonyms
112
- assert_equal(["Tumor suppressor p53", "Phosphoprotein p53", "Antigen NY-CO-13"], @obj.synonyms)
108
+ ary = ["Tumor suppressor p53", "Phosphoprotein p53", "Antigen NY-CO-13"]
109
+ assert_equal(ary, @obj.synonyms)
113
110
  end
114
111
 
115
112
  def test_gn
116
- assert_equal([{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}], @obj.gn)
113
+ assert_equal([{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}],
114
+ @obj.gn)
117
115
  end
118
116
 
119
117
  def test_gn_uniprot_parser
120
118
  gn_uniprot_data = ''
121
- assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}], @obj.instance_eval("gn_uniprot_parser"))
119
+ assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}],
120
+ @obj.instance_eval("gn_uniprot_parser"))
122
121
  end
123
122
 
124
123
  def test_gn_old_parser
125
124
  gn_old_data = ''
126
- assert_equal([["Name=TP53; Synonyms=P53;"]], @obj.instance_eval("gn_old_parser"))
125
+ assert_equal([["Name=TP53; Synonyms=P53;"]],
126
+ @obj.instance_eval("gn_old_parser"))
127
127
  end
128
128
 
129
129
  def test_gene_names
@@ -189,15 +189,19 @@ module Bio
189
189
  end
190
190
 
191
191
  def test_oc
192
- assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria", "Euarchontoglires", "Primates", "Catarrhini", "Hominidae", "Homo"], @obj.oc)
192
+ assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata",
193
+ "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria",
194
+ "Euarchontoglires", "Primates", "Catarrhini", "Hominidae",
195
+ "Homo"],
196
+ @obj.oc)
193
197
  end
194
198
 
195
199
  def test_ox
196
200
  assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox)
197
201
  end
198
202
 
199
- def test_ref # Bio::EMBL::COMMON#ref
200
- @obj.ref
203
+ def test_ref # Bio::SPTR#ref
204
+ assert_equal(Array, @obj.ref.class)
201
205
  end
202
206
 
203
207
  def test_cc
@@ -205,17 +209,32 @@ module Bio
205
209
  end
206
210
 
207
211
  def test_cc_database
208
- db = [{"NAME"=>"IARC TP53 mutation database", "WWW"=>"http://www.iarc.fr/p53/", "FTP"=>nil, "NOTE"=>"IARC db of somatic p53 mutations"},
209
- {"NAME"=>"Tokyo p53", "WWW"=>"http://p53.genome.ad.jp/", "FTP"=>nil, "NOTE"=>"University of Tokyo db of p53 mutations"},
210
- {"NAME"=>"p53 web site at the Institut Curie", "WWW"=>"http://p53.curie.fr/", "FTP"=>nil, "NOTE"=>nil},
211
- {"NAME"=>"Atlas Genet. Cytogenet. Oncol. Haematol.", "WWW"=>"http://www.infobiogen.fr/services/chromcancer/Genes/P53ID88.html", "FTP"=>nil, "NOTE"=>nil}]
212
+ db = [{"NAME" => "IARC TP53 mutation database",
213
+ "WWW" => "http://www.iarc.fr/p53/",
214
+ "FTP" => nil, "NOTE" => "IARC db of somatic p53 mutations"},
215
+ {"NAME" => "Tokyo p53",
216
+ "WWW" => "http://p53.genome.ad.jp/", "FTP" => nil,
217
+ "NOTE" => "University of Tokyo db of p53 mutations"},
218
+ {"NAME" => "p53 web site at the Institut Curie",
219
+ "WWW" => "http://p53.curie.fr/", "FTP" => nil, "NOTE" => nil},
220
+ {"NAME" => "Atlas Genet. Cytogenet. Oncol. Haematol.",
221
+ "WWW" => "http://www.infobiogen.fr/services/chromcancer/Genes/P53ID88.html",
222
+ "FTP" => nil, "NOTE" => nil}]
212
223
  assert_equal(db, @obj.cc('DATABASE'))
213
224
  end
214
225
 
215
226
  def test_cc_alternative_products
216
- ap = {"Comment"=>nil, "Named isoforms"=>"2", "Variants"=> [{"IsoId"=>"P04637-1", "Name"=>"1", "Sequence"=>["Displayed"]},
217
- {"IsoId"=>"P04637-2", "Name"=>"2", "Synonyms"=>"I9RET", "Sequence"=>["VSP_006535", "VSP_006536"]}],
218
- "Event"=>"Alternative splicing"}
227
+ ap = {"Comment" => "",
228
+ "Named isoforms" => "2",
229
+ "Variants" => [{"IsoId" => ["P04637-1"],
230
+ "Name" => "1",
231
+ "Synonyms" => [],
232
+ "Sequence" => ["Displayed"]},
233
+ {"IsoId" => ["P04637-2"],
234
+ "Name" => "2",
235
+ "Synonyms" => ["I9RET"],
236
+ "Sequence" => ["VSP_006535", "VSP_006536"]}],
237
+ "Event" => ["Alternative splicing"]}
219
238
  assert_equal(ap, @obj.cc('ALTERNATIVE PRODUCTS'))
220
239
  end
221
240
 
@@ -224,20 +243,28 @@ module Bio
224
243
  end
225
244
 
226
245
 
227
-
228
246
  def test_kw
229
- keywords = ["3D-structure", "Acetylation", "Activator", "Alternative splicing", "Anti-oncogene", "Apoptosis", "Cell cycle", "Disease mutation", "DNA-binding", "Glycoprotein", "Li-Fraumeni syndrome", "Metal-binding", "Nuclear protein", "Phosphorylation", "Polymorphism", "Transcription", "Transcription regulation", "Zinc"]
247
+ keywords = ["3D-structure", "Acetylation", "Activator",
248
+ "Alternative splicing", "Anti-oncogene",
249
+ "Apoptosis", "Cell cycle", "Disease mutation", "DNA-binding",
250
+ "Glycoprotein", "Li-Fraumeni syndrome", "Metal-binding",
251
+ "Nuclear protein", "Phosphorylation", "Polymorphism",
252
+ "Transcription", "Transcription regulation", "Zinc"]
230
253
  assert_equal(keywords, @obj.kw)
231
254
  end
232
255
 
233
256
  def test_ft
234
257
  assert(@obj.ft)
235
258
  name = 'DNA_BIND'
236
- assert_equal([{"FTId"=>nil, "From"=>102, "diff"=>[], "To"=>292, "Description"=>nil}], @obj.ft(name))
259
+ assert_equal([{"FTId"=>"", "From"=>102, "diff"=>[], "To"=>292,
260
+ "Description"=>"",
261
+ "original" => ['DNA_BIND', '102', '292', '', '']}],
262
+ @obj.ft[name])
237
263
  end
238
264
 
239
265
  def test_sq
240
- assert_equal({"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653}, @obj.sq)
266
+ assert_equal({"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653},
267
+ @obj.sq)
241
268
  end
242
269
 
243
270
  def test_sq_crc64
@@ -264,5 +291,1485 @@ module Bio
264
291
  assert_equal(seq, @obj.aaseq)
265
292
  end
266
293
 
294
+ end # class TestSPTR
295
+
296
+
297
+
298
+ class TestSPTRCC < Test::Unit::TestCase
299
+ def test_allergen
300
+ # ALLERGEN Information relevant to allergenic proteins
301
+ data = 'CC -!- ALLERGEN: Causes an allergic reaction in human.'
302
+ sp = Bio::SPTR.new(data)
303
+ assert_equal(['Causes an allergic reaction in human.'],
304
+ sp.cc['ALLERGEN'])
305
+ assert_equal(['Causes an allergic reaction in human.'],
306
+ sp.cc('ALLERGEN'))
307
+ end
308
+
309
+ def test_alternative_products_access_as_hash
310
+ data = "CC -!- ALTERNATIVE PRODUCTS:
311
+ CC Event=Alternative initiation; Named isoforms=2;
312
+ CC Name=Long;
313
+ CC IsoId=P68250-1; Sequence=Displayed;
314
+ CC Name=Short;
315
+ CC IsoId=P68250-2; Sequence=VSP_018631;
316
+ CC Note=Contains a N-acetylmethionine at position 1 (By
317
+ CC similarity);"
318
+
319
+ res = ["Event=Alternative initiation; Named isoforms=2; Name=Long; IsoId=P68250-1; Sequence=Displayed; Name=Short; IsoId=P68250-2; Sequence=VSP_018631; Note=Contains a N-acetylmethionine at position 1 (By similarity);"]
320
+ sp = Bio::SPTR.new(data)
321
+ assert_equal(res,
322
+ sp.cc['ALTERNATIVE PRODUCTS'])
323
+ end
324
+
325
+ def test_alternative_products_ai
326
+ # ALTERNATIVE PRODUCTS Description of the existence of related protein sequence(s) produced by alternative splicing of the same gene, alternative promoter usage, ribosomal frameshifting or by the use of alternative initiation codons; see 3.21.15
327
+ # Alternative promoter usage, Alternative splicing, Alternative initiation, Ribosomal frameshifting
328
+ data = "CC -!- ALTERNATIVE PRODUCTS:
329
+ CC Event=Alternative initiation; Named isoforms=2;
330
+ CC Name=Long;
331
+ CC IsoId=P68250-1; Sequence=Displayed;
332
+ CC Name=Short;
333
+ CC IsoId=P68250-2; Sequence=VSP_018631;
334
+ CC Note=Contains a N-acetylmethionine at position 1 (By
335
+ CC similarity);"
336
+
337
+ sp = Bio::SPTR.new(data)
338
+ assert_equal({"Comment"=>"",
339
+ "Named isoforms"=>"2",
340
+ "Variants"=>
341
+ [{"IsoId"=>["P68250-1"],
342
+ "Name"=>"Long",
343
+ "Synonyms" => [],
344
+ "Sequence"=>["Displayed"]},
345
+ {"IsoId"=>["P68250-2"],
346
+ "Name"=>"Short",
347
+ "Synonyms" => [],
348
+ "Sequence"=>["VSP_018631"]}],
349
+ "Event"=>["Alternative initiation"]},
350
+ sp.cc('ALTERNATIVE PRODUCTS'))
351
+ end
352
+ def test_alternative_products_as
353
+ data = "CC -!- ALTERNATIVE PRODUCTS:
354
+ CC Event=Alternative splicing; Named isoforms=2;
355
+ CC Name=1;
356
+ CC IsoId=P04637-1; Sequence=Displayed;
357
+ CC Name=2; Synonyms=I9RET;
358
+ CC IsoId=P04637-2; Sequence=VSP_006535, VSP_006536;
359
+ CC Note=Seems to be non-functional. Expressed in quiescent
360
+ CC lymphocytes;"
361
+ sp = Bio::SPTR.new(data)
362
+ assert_equal({"Comment"=>"",
363
+ "Named isoforms"=>"2",
364
+ "Variants"=>
365
+ [{"Name"=>"1",
366
+ "IsoId"=>["P04637-1"],
367
+ "Synonyms"=>[],
368
+ "Sequence"=>["Displayed"]},
369
+ {"IsoId"=>["P04637-2"],
370
+ "Name"=>"2",
371
+ "Synonyms"=>["I9RET"],
372
+ "Sequence"=>["VSP_006535", "VSP_006536"]}],
373
+ "Event"=>["Alternative splicing"]},
374
+ sp.cc('ALTERNATIVE PRODUCTS'))
375
+ end
376
+ def test_alternative_products_apu
377
+ data = "CC -!- ALTERNATIVE PRODUCTS:
378
+ CC Event=Alternative promoter usage, Alternative splicing; Named isoforms=5;
379
+ CC Comment=Additional isoforms (AAT-1L and AAT-1S) may exist;
380
+ CC Name=1; Synonyms=AAT-1M;
381
+ CC IsoId=Q7Z4T9-1; Sequence=Displayed;
382
+ CC Name=2;
383
+ CC IsoId=Q7Z4T9-2; Sequence=VSP_014910, VSP_014911;
384
+ CC Note=No experimental confirmation available;
385
+ CC Name=3;
386
+ CC IsoId=Q7Z4T9-3; Sequence=VSP_014907, VSP_014912;
387
+ CC Name=4; Synonyms=AAT1-alpha;
388
+ CC IsoId=Q7Z4T9-4; Sequence=VSP_014908;
389
+ CC Note=May be produced by alternative promoter usage;
390
+ CC Name=5; Synonyms=AAT1-beta, AAT1-gamma;
391
+ CC IsoId=Q7Z4T9-5; Sequence=VSP_014909;
392
+ CC Note=May be produced by alternative promoter usage;"
393
+ sp = Bio::SPTR.new(data)
394
+ assert_equal({"Comment"=>"Additional isoforms (AAT-1L and AAT-1S) may exist",
395
+ "Named isoforms"=>"5",
396
+ "Variants"=>
397
+ [{"Name"=>"1",
398
+ "IsoId"=>["Q7Z4T9-1"],
399
+ "Synonyms"=>["AAT-1M"],
400
+ "Sequence"=>["Displayed"]},
401
+ {"Name"=>"2",
402
+ "IsoId"=>["Q7Z4T9-2"],
403
+ "Synonyms" => [],
404
+ "Sequence"=>["VSP_014910", "VSP_014911"]},
405
+ {"Name"=>"3",
406
+ "IsoId"=>["Q7Z4T9-3"],
407
+ "Synonyms" => [],
408
+ "Sequence"=>["VSP_014907", "VSP_014912"]},
409
+ {"Name"=>"4",
410
+ "IsoId"=>["Q7Z4T9-4"],
411
+ "Synonyms"=>["AAT1-alpha"],
412
+ "Sequence"=>["VSP_014908"]},
413
+ {"Name"=>"5",
414
+ "IsoId"=>["Q7Z4T9-5"],
415
+ "Synonyms"=>["AAT1-beta", "AAT1-gamma"],
416
+ "Sequence"=>["VSP_014909"]}],
417
+ "Event"=>["Alternative promoter usage", "Alternative splicing"]},
418
+ sp.cc('ALTERNATIVE PRODUCTS'))
419
+ end
420
+ def test_alternative_products_rf
421
+ data = ""
422
+ sp = Bio::SPTR.new(data)
423
+ assert_equal({},
424
+ sp.cc('ALTERNATIVE PRODUCTS'))
425
+ end
426
+
427
+ def test_biophysicochemical_properties
428
+ # BIOPHYSICOCHEMICAL PROPERTIES Description of the information relevant to biophysical and physicochemical data and information on pH dependence, temperature dependence, kinetic parameters, redox potentials, and maximal absorption; see 3.21.8
429
+ #
430
+ data = 'CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
431
+ CC Kinetic parameters:
432
+ CC KM=45 uM for AdoMet;
433
+ CC Vmax=32 uM/h/mg enzyme;
434
+ CC pH dependence:
435
+ CC Optimum pH is 8.2;'
436
+ sp = Bio::SPTR.new(data)
437
+ assert_equal(["Kinetic parameters: KM=45 uM for AdoMet; Vmax=32 uM/h/mg enzyme; pH dependence: Optimum pH is 8.2;"],
438
+ sp.cc['BIOPHYSICOCHEMICAL PROPERTIES'])
439
+ assert_equal({"Redox potential" => "",
440
+ "Temperature dependence" => "",
441
+ "Kinetic parameters" => {"KM" => "45 uM for AdoMet",
442
+ "Vmax" => "32 uM/h/mg enzyme"},
443
+ "Absorption" => {},
444
+ "pH dependence" => "Optimum pH is 8.2"},
445
+ sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
446
+
447
+ # 3.12.2. Syntax of the topic 'BIOPHYSICOCHEMICAL PROPERTIES'
448
+ data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
449
+ CC Absorption:
450
+ CC Abs(max)=xx nm;
451
+ CC Note=free_text;
452
+ CC Kinetic parameters:
453
+ CC KM=xx unit for substrate [(free_text)];
454
+ CC Vmax=xx unit enzyme [free_text];
455
+ CC Note=free_text;
456
+ CC pH dependence:
457
+ CC free_text;
458
+ CC Redox potential:
459
+ CC free_text;
460
+ CC Temperature dependence:
461
+ CC free_text;"
462
+ sp = Bio::SPTR.new(data)
463
+ assert_equal({"Redox potential"=>"free_text",
464
+ "Temperature dependence"=>"free_text",
465
+ "Kinetic parameters"=>
466
+ {"KM"=>"xx unit for substrate [(free_text)]",
467
+ "Note"=>"free_text",
468
+ "Vmax"=>"xx unit enzyme [free_text]"},
469
+ "Absorption"=>{"Note"=>"free_text", "Abs(max)"=>"xx nm"},
470
+ "pH dependence"=>"free_text"},
471
+ sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
472
+ end
473
+
474
+
475
+ def test_biotechnology
476
+ # BIOTECHNOLOGY Description of the use of a specific protein in a biotechnological process
477
+ data = 'CC -!- BIOTECHNOLOGY: Introduced by genetic manipulation and expressed in
478
+ CC improved ripening tomato by Monsanto. ACC is the immediate
479
+ CC precursor of the phytohormone ethylene which is involved in the
480
+ CC control of ripening. ACC deaminase reduces ethylene biosynthesis
481
+ CC and thus extends the shelf life of fruits and vegetables.'
482
+ sp = Bio::SPTR.new(data)
483
+ assert_equal(["Introduced by genetic manipulation and expressed in improved ripening tomato by Monsanto. ACC is the immediate precursor of the phytohormone ethylene which is involved in the control of ripening. ACC deaminase reduces ethylene biosynthesis and thus extends the shelf life of fruits and vegetables."],
484
+ sp.cc['BIOTECHNOLOGY'])
485
+ end
486
+
487
+ def test_catalytic_activity
488
+ # CATALYTIC ACTIVITY Description of the reaction(s) catalyzed by an enzyme [1]
489
+ data = 'CC -!- CATALYTIC ACTIVITY: Hydrolysis of alkylated DNA, releasing 3-
490
+ CC methyladenine, 3-methylguanine, 7-methylguanine and 7-
491
+ CC methyladenine.'
492
+ sp = Bio::SPTR.new(data)
493
+ assert_equal(["Hydrolysis of alkylated DNA, releasing 3-methyladenine, 3-methylguanine, 7-methylguanine and 7-methyladenine."],
494
+ sp.cc['CATALYTIC ACTIVITY'])
495
+ end
496
+
497
+ def test_caution
498
+ # CAUTION Warning about possible errors and/or grounds for confusion
499
+ data = 'CC -!- CAUTION: Ref.1 sequence differs from that shown due to a Leu codon
500
+ CC in position 480 which was translated as a stop codon to shorten
501
+ CC the sequence.'
502
+ sp = Bio::SPTR.new(data)
503
+ assert_equal(["Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence."],
504
+ sp.cc['CAUTION'])
505
+ assert_equal("Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence.",
506
+ sp.cc('CAUTION'))
507
+
508
+ end
509
+
510
+ def test_cofactor
511
+ # COFACTOR Description of any non-protein substance required by an enzyme for its catalytic activity
512
+ data = 'CC -!- COFACTOR: Cl(-). Is unique in requiring Cl(-) for its activity.
513
+ CC -!- COFACTOR: Mg(2+).'
514
+ sp = Bio::SPTR.new(data)
515
+ assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
516
+ "Mg(2+)."],
517
+ sp.cc['COFACTOR'])
518
+
519
+ assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
520
+ "Mg(2+)."],
521
+ sp.cc('COFACTOR'))
522
+ end
523
+
524
+ def test_developmental_stage
525
+ # DEVELOPMENTAL STAGE Description of the developmentally-specific expression of mRNA or protein
526
+ data = 'CC -!- DEVELOPMENTAL STAGE: In females, isoform 1 is expressed at day 35
527
+ CC with higher levels detected at day 56. Isoform 1 is not detected
528
+ CC in males of any age.'
529
+ sp = Bio::SPTR.new(data)
530
+ assert_equal(["In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age."],
531
+ sp.cc['DEVELOPMENTAL STAGE'])
532
+ assert_equal("In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age.",
533
+ sp.cc('DEVELOPMENTAL STAGE'))
534
+ end
535
+
536
+ def test_disease
537
+ # DISEASE Description of the disease(s) associated with a deficiency of a protein
538
+ data = 'CC -!- DISEASE: Defects in APP are a cause of hereditary cerebral
539
+ CC hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This
540
+ CC disorder is characterized by amyloid deposits in cerebral vessels.
541
+ CC The principal clinical characteristics are recurring cerebral
542
+ CC hemorrhages, sometimes preceded by migrainous headaches or mental
543
+ CC cleavage. Various types of HCHWAD are known. They differ in onset
544
+ CC and aggressiveness of the disease. The Iowa type demonstrated no
545
+ CC cerebral hemorrhaging but is characterized by progressive
546
+ CC cognitive decline. Beta-APP40 is the predominant form of
547
+ CC cerebrovascular amyloid.'
548
+ sp = Bio::SPTR.new(data)
549
+ assert_equal(["Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid."],
550
+ sp.cc['DISEASE'])
551
+ assert_equal("Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid.",
552
+ sp.cc('DISEASE'))
553
+ end
554
+
555
+ def test_domain
556
+ # DOMAIN Description of the domain structure of a protein
557
+ data = 'CC -!- DOMAIN: The basolateral sorting signal (BaSS) is required for
558
+ CC sorting of membrane proteins to the basolateral surface of
559
+ CC epithelial cells.
560
+ CC -!- DOMAIN: The NPXY sequence motif found in many tyrosine-
561
+ CC phosphorylated proteins is required for the specific binding of
562
+ CC the PID domain. However, additional amino acids either N- or C-
563
+ CC terminal to the NPXY motif are often required for complete
564
+ CC interaction. The PID domain-containing proteins which bind APP
565
+ CC require the YENPTY motif for full interaction. These interactions
566
+ CC are independent of phosphorylation on the terminal tyrosine
567
+ CC residue. The NPXY site is also involved in clathrin-mediated
568
+ CC endocytosis (By similarity).'
569
+ sp = Bio::SPTR.new(data)
570
+ assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
571
+ "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
572
+ sp.cc['DOMAIN'])
573
+ assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
574
+ "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
575
+ sp.cc('DOMAIN'))
576
+ end
577
+
578
+ def test_enzyme_regulation
579
+ # ENZYME REGULATION Description of an enzyme regulatory mechanism
580
+ data = 'CC -!- ENZYME REGULATION: Insensitive to calcium/calmodulin. Stimulated
581
+ CC by the G protein beta and gamma subunit complex.'
582
+ sp = Bio::SPTR.new(data)
583
+ assert_equal(["Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex."],
584
+ sp.cc['ENZYME REGULATION'])
585
+ assert_equal("Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex.",
586
+ sp.cc('ENZYME REGULATION'))
587
+ end
588
+
589
+ def test_function
590
+ # FUNCTION General description of the function(s) of a protein
591
+ data = 'CC -!- FUNCTION: May play a fundamental role in situations where fine
592
+ CC interplay between intracellular calcium and cAMP determines the
593
+ CC cellular function. May be a physiologically relevant docking site
594
+ CC for calcineurin (By similarity).'
595
+ sp = Bio::SPTR.new(data)
596
+ assert_equal(["May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity)."],
597
+ sp.cc['FUNCTION'])
598
+ assert_equal("May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity).",
599
+ sp.cc('FUNCTION'))
600
+ end
601
+
602
+ def test_induction
603
+ # INDUCTION Description of the compound(s) or condition(s) that regulate gene expression
604
+ data = 'CC -!- INDUCTION: By pheromone (alpha-factor).'
605
+ sp = Bio::SPTR.new(data)
606
+ assert_equal(["By pheromone (alpha-factor)."],
607
+ sp.cc['INDUCTION'])
608
+ assert_equal("By pheromone (alpha-factor).",
609
+ sp.cc('INDUCTION'))
610
+ end
611
+
612
+ def test_interaction
613
+ # INTERACTION Conveys information relevant to binary protein-protein interaction 3.21.12
614
+ data = 'CC -!- INTERACTION:
615
+ CC P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435;
616
+ CC P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;'
617
+ sp = Bio::SPTR.new(data)
618
+ assert_equal(["P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;"],
619
+ sp.cc['INTERACTION'])
620
+ assert_equal([{'SP_Ac' => 'P62158',
621
+ 'identifier' => 'CALM1',
622
+ 'optional_identifier' => '(xeno)',
623
+ 'NbExp' => '1',
624
+ 'IntAct' => ['EBI-457011', 'EBI-397435']},
625
+ {'SP_Ac' => 'P62155',
626
+ 'identifier' => 'calm1',
627
+ 'optional_identifier' => '(xeno)',
628
+ 'NbExp' => '1',
629
+ 'IntAct' => ['EBI-457011', 'EBI-397568']}],
630
+ sp.cc('INTERACTION'))
631
+ end
632
+
633
+ def test_mass_spectrometry
634
+ # MASS SPECTROMETRY Reports the exact molecular weight of a protein or part of a protein as determined by mass spectrometric methods; see 3.21.23
635
+ data = "CC -!- MASS SPECTROMETRY: MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29;
636
+ CC NOTE=Ref.1.
637
+ CC -!- MASS SPECTROMETRY: MW=2892.2; METHOD=Electrospray; RANGE=1-29;
638
+ CC NOTE=Ref.2."
639
+ sp = Bio::SPTR.new(data)
640
+ assert_equal(["MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; NOTE=Ref.1.",
641
+ "MW=2892.2; METHOD=Electrospray; RANGE=1-29; NOTE=Ref.2."],
642
+ sp.cc['MASS SPECTROMETRY'])
643
+ assert_equal([{'MW' => '2894.9',
644
+ 'MW_ERR' => '3',
645
+ 'METHOD' => 'MALDI',
646
+ 'RANGE' => '1-29',
647
+ 'NOTE' => 'Ref.1'},
648
+ {'MW' => '2892.2',
649
+ 'METHOD' => 'Electrospray',
650
+ 'MW_ERR' => nil,
651
+ 'RANGE' => '1-29',
652
+ 'NOTE' => 'Ref.2'}],
653
+ sp.cc('MASS SPECTROMETRY'))
654
+ end
655
+
656
+ def test_miscellaneous
657
+ # MISCELLANEOUS Any comment which does not belong to any of the other defined topics
658
+ data = 'CC -!- MISCELLANEOUS: There are two isozymes; a cytoplasmic one and a
659
+ CC mitochondrial one.'
660
+ sp = Bio::SPTR.new(data)
661
+ assert_equal(["There are two isozymes; a cytoplasmic one and a mitochondrial one."],
662
+ sp.cc['MISCELLANEOUS'])
663
+ end
664
+
665
+ def test_pathway
666
+ # PATHWAY Description of the metabolic pathway(s) with which a protein is associated
667
+ data = 'CC -!- PATHWAY: Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-
668
+ CC phosphate and glycerone phosphate from D-glucose: step 4.'
669
+ sp = Bio::SPTR.new(data)
670
+ assert_equal(["Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-phosphate and glycerone phosphate from D-glucose: step 4."],
671
+ sp.cc['PATHWAY'])
672
+ assert_equal(["Carbohydrate degradation",
673
+ 'glycolysis',
674
+ 'D-glyceraldehyde 3-phosphate',
675
+ 'glycerone phosphate from D-glucose',
676
+ 'step 4'],
677
+ sp.cc('PATHWAY'))
678
+ end
679
+
680
+ def test_pharmaceutical
681
+ # PHARMACEUTICAL Description of the use of a protein as a pharmaceutical drug
682
+ data = 'CC -!- PHARMACEUTICAL: Available under the names Factrel (Ayerst Labs),
683
+ CC Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm
684
+ CC (Serono). Used in evaluating hypothalamic-pituitary gonadotropic
685
+ CC function.'
686
+ sp = Bio::SPTR.new(data)
687
+ assert_equal(["Available under the names Factrel (Ayerst Labs), Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm (Serono). Used in evaluating hypothalamic-pituitary gonadotropic function."],
688
+ sp.cc['PHARMACEUTICAL'])
689
+ end
690
+
691
+ def test_polymorphism
692
+ # POLYMORPHISM Description of polymorphism(s)
693
+ data = 'CC -!- POLYMORPHISM: Position 161 is associated with platelet-specific
694
+ CC alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161.
695
+ CC Siba is involved in neonatal alloimmune thrombocytopenia (NATP).
696
+ CC -!- POLYMORPHISM: Polymorphisms arise from a variable number of tandem
697
+ CC 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-
698
+ CC like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown
699
+ CC here) contains one repeat starting at position 415, allele C
700
+ CC contains two repeats, allele B contains three repeats and allele A
701
+ CC contains four repeats.'
702
+ sp = Bio::SPTR.new(data)
703
+ assert_equal(["Position 161 is associated with platelet-specific alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. Siba is involved in neonatal alloimmune thrombocytopenia (NATP).",
704
+ "Polymorphisms arise from a variable number of tandem 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown here) contains one repeat starting at position 415, allele C contains two repeats, allele B contains three repeats and allele A contains four repeats."],
705
+ sp.cc['POLYMORPHISM'])
706
+ end
707
+
708
+ def test_ptm
709
+ # PTM Description of any chemical alternation of a polypeptide (proteolytic cleavage, amino acid modifications including crosslinks). This topic complements information given in the feature table or indicates polypeptide modifications for which position-specific data is not available.
710
+ data = 'CC -!- PTM: N-glycosylated, contains approximately 8 kDa of N-linked
711
+ CC carbohydrate.
712
+ CC -!- PTM: Palmitoylated.'
713
+ sp = Bio::SPTR.new(data)
714
+ assert_equal(["N-glycosylated, contains approximately 8 kDa of N-linked carbohydrate.",
715
+ "Palmitoylated."],
716
+ sp.cc['PTM'])
717
+ end
718
+
719
+ def test_rna_editing
720
+ # RNA EDITING Description of any type of RNA editing that leads to one or more amino acid changes
721
+ data = 'CC -!- RNA EDITING: Modified_positions=50, 59, 78, 87, 104, 132, 139,
722
+ CC 146, 149, 160, 170, 177, 185, 198, 208, 223, 226, 228, 243, 246,
723
+ CC 252, 260, 264, 277, 285, 295; Note=The nonsense codons at
724
+ CC positions 50, 78, 104, 260 and 264 are modified to sense codons.'
725
+
726
+ data = 'CC -!- RNA EDITING: Modified_positions=607; Note=Fully edited in the
727
+ CC brain. Heteromerically expressed edited GLUR2 (R) receptor
728
+ CC complexes are impermeable to calcium, whereas the unedited (Q)
729
+ CC forms are highly permeable to divalent ions (By similarity).'
730
+ sp = Bio::SPTR.new(data)
731
+ assert_equal(["Modified_positions=607; Note=Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."],
732
+ sp.cc['RNA EDITING'])
733
+ assert_equal({"Modified_positions" => ['607'],
734
+ "Note" => "Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."},
735
+ sp.cc('RNA EDITING'))
736
+ end
737
+
738
+ def test_similarity
739
+ # SIMILARITY Description of the similaritie(s) (sequence or structural) of a protein with other proteins
740
+ data = 'CC -!- SIMILARITY: Contains 1 protein kinase domain.
741
+ CC -!- SIMILARITY: Contains 1 RGS domain.'
742
+ sp = Bio::SPTR.new(data)
743
+ assert_equal(["Contains 1 protein kinase domain.", "Contains 1 RGS domain."],
744
+ sp.cc['SIMILARITY'])
745
+ end
746
+
747
+ def test_subcellular_location
748
+ # SUBCELLULAR LOCATION Description of the subcellular location of the mature protein
749
+
750
+ data = 'CC -!- SUBCELLULAR LOCATION: Or: Cytoplasm. Or: Secreted protein. May be
751
+ CC secreted by a non-classical secretory pathway.'
752
+
753
+ data = "CC -!- SUBCELLULAR LOCATION: Cytoplasmic or may be secreted by a non-
754
+ CC classical secretory pathway (By similarity)."
755
+
756
+ data = "CC -!- SUBCELLULAR LOCATION: Cytoplasm. In neurons, axonally transported
757
+ CC to the nerve terminals."
758
+
759
+ data = "CC -!- SUBCELLULAR LOCATION: Cell wall. Probably the external side of the
760
+ CC cell wall."
761
+
762
+ data = "CC -!- SUBCELLULAR LOCATION: Endosome; late endosome; late endosomal
763
+ CC membrane; single-pass type I membrane protein. Lysosome; lysosomal
764
+ CC membrane; single-pass type I membrane protein. Localizes to late
765
+ CC endocytic compartment. Associates with lysosome membranes."
766
+
767
+
768
+ data = "CC -!- SUBCELLULAR LOCATION: Plastid; chloroplast; chloroplast membrane;
769
+ CC peripheral membrane protein. Plastid; chloroplast; chloroplast
770
+ CC stroma."
771
+ sp = Bio::SPTR.new(data)
772
+ assert_equal(["Plastid; chloroplast; chloroplast membrane; peripheral membrane protein. Plastid; chloroplast; chloroplast stroma."],
773
+ sp.cc['SUBCELLULAR LOCATION'])
774
+ assert_equal([["Plastid",
775
+ "chloroplast",
776
+ "chloroplast membrane",
777
+ "peripheral membrane protein"],
778
+ ["Plastid", "chloroplast",
779
+ "chloroplast stroma"]],
780
+ sp.cc('SUBCELLULAR LOCATION'))
781
+ end
782
+
783
+ def test_subunit
784
+ # SUBUNIT Description of the quaternary structure of a protein and any kind of interactions with other proteins or protein complexes; except for receptor-ligand interactions, which are described in the topic FUNCTION.
785
+
786
+ data = 'CC -!- SUBUNIT: Interacts with BTK. Interacts with all isoforms of MAPK8,
787
+ CC MAPK9, MAPK10 and MAPK12.'
788
+
789
+ data = 'CC -!- SUBUNIT: Homotetramer.'
790
+ sp = Bio::SPTR.new(data)
791
+ assert_equal(["Homotetramer."],
792
+ sp.cc['SUBUNIT'])
793
+ end
794
+
795
+ def test_tissue_specificity
796
+ # TISSUE SPECIFICITY Description of the tissue-specific expression of mRNA or protein
797
+ data = "CC -!- TISSUE SPECIFICITY: Heart, brain and liver mitochondria."
798
+
799
+ data = "CC -!- TISSUE SPECIFICITY: Widely expressed with highest expression in
800
+ CC thymus, testis, embryo and proliferating blood lymphocytes."
801
+
802
+ data = "CC -!- TISSUE SPECIFICITY: Isoform 2 is highly expressed in the brain,
803
+ CC heart, spleen, kidney and blood. Isoform 2 is expressed (at
804
+ CC protein level) in the spleen, skeletal muscle and gastrointestinal
805
+ CC epithelia."
806
+ sp = Bio::SPTR.new(data)
807
+ assert_equal(["Isoform 2 is highly expressed in the brain, heart, spleen, kidney and blood. Isoform 2 is expressed (at protein level) in the spleen, skeletal muscle and gastrointestinal epithelia."],
808
+ sp.cc['TISSUE SPECIFICITY'])
809
+ end
810
+
811
+ def test_toxic_dose
812
+ # TOXIC DOSE Description of the lethal dose (LD), paralytic dose (PD) or effective dose of a protein
813
+ data = 'CC -!- TOXIC DOSE: LD(50) is 12 mg/kg by intraperitoneal injection.'
814
+ sp = Bio::SPTR.new(data)
815
+ assert_equal(["LD(50) is 12 mg/kg by intraperitoneal injection."],
816
+ sp.cc['TOXIC DOSE'])
817
+ end
818
+
819
+ def test_web_resource
820
+ # WEB RESOURCE Description of a cross-reference to a network database/resource for a specific protein; see 3.21.34
821
+ data = 'CC -!- WEB RESOURCE: NAME=Inherited peripheral neuropathies mutation db;
822
+ CC URL="http://www.molgen.ua.ac.be/CMTMutations/".
823
+ CC -!- WEB RESOURCE: NAME=Connexin-deafness homepage;
824
+ CC URL="http://www.crg.es/deafness/".
825
+ CC -!- WEB RESOURCE: NAME=GeneReviews;
826
+ CC URL="http://www.genetests.org/query?gene=GJB1".'
827
+ sp = Bio::SPTR.new(data)
828
+ assert_equal(['NAME=Inherited peripheral neuropathies mutation db; URL="http://www.molgen.ua.ac.be/CMTMutations/".',
829
+ 'NAME=Connexin-deafness homepage; URL="http://www.crg.es/deafness/".',
830
+ 'NAME=GeneReviews; URL="http://www.genetests.org/query?gene=GJB1".'],
831
+ sp.cc['WEB RESOURCE'])
832
+ assert_equal([{'NAME' => "Inherited peripheral neuropathies mutation db",
833
+ 'URL' => 'http://www.molgen.ua.ac.be/CMTMutations/', 'NOTE' => nil},
834
+ {'NAME' => "Connexin-deafness homepage",
835
+ 'URL' => 'http://www.crg.es/deafness/', 'NOTE' => nil},
836
+ {'NAME' => "GeneReviews",
837
+ 'URL' => 'http://www.genetests.org/query?gene=GJB1', 'NOTE' => nil}],
838
+ sp.cc('WEB RESOURCE'))
839
+
840
+ end
841
+
842
+ end # class TestSPTRCC
843
+
844
+ # http://br.expasy.org/sprot/userman.html#Ref_line
845
+ class TestSPTRRef < Test::Unit::TestCase
846
+
847
+ def setup
848
+ data = 'RN [1]
849
+ RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C), FUNCTION, INTERACTION
850
+ RP WITH PKC-3, SUBCELLULAR LOCATION, TISSUE SPECIFICITY, DEVELOPMENTAL
851
+ RP STAGE, AND MUTAGENESIS OF PHE-175 AND PHE-221.
852
+ RC STRAIN=Bristol N2;
853
+ RX PubMed=11134024; DOI=10.1074/jbc.M008990200;
854
+ RG The mouse genome sequencing consortium;
855
+ RA Galinier A., Bleicher F., Negre D., Perriere G., Duclos B.,
856
+ RA Cozzone A.J., Cortay J.-C.;
857
+ RT "A novel adapter protein employs a phosphotyrosine binding domain and
858
+ RT exceptionally basic N-terminal domains to capture and localize an
859
+ RT atypical protein kinase C: characterization of Caenorhabditis elegans
860
+ RT C kinase adapter 1, a protein that avidly binds protein kinase C3.";
861
+ RL J. Biol. Chem. 276:10463-10475(2001).'
862
+ @obj = SPTR.new(data)
863
+ end
864
+
865
+ def test_ref
866
+ res = {"RT" => "A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
867
+ "RL" => "J. Biol. Chem. 276:10463-10475(2001).",
868
+ "RA" => "Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
869
+ "RX" => {"MEDLINE" => nil,
870
+ "DOI" => "10.1074/jbc.M008990200",
871
+ "PubMed" => "11134024"},
872
+ "RC" => [{"Text" => "Bristol N2", "Token" => "STRAIN"}],
873
+ "RN" => "[1]",
874
+ "RP" => ["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
875
+ "FUNCTION",
876
+ "INTERACTION WITH PKC-3",
877
+ "SUBCELLULAR LOCATION",
878
+ "TISSUE SPECIFICITY",
879
+ "DEVELOPMENTAL STAGE",
880
+ "MUTAGENESIS OF PHE-175 AND PHE-221"],
881
+ "RG" => ["The mouse genome sequencing consortium"]}
882
+ assert_equal(res, @obj.ref.first)
883
+ end
884
+
885
+ def test_RN
886
+ assert_equal("[1]", @obj.ref.first['RN'])
887
+ end
888
+
889
+ def test_RP
890
+ assert_equal(["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
891
+ "FUNCTION", "INTERACTION WITH PKC-3",
892
+ "SUBCELLULAR LOCATION",
893
+ "TISSUE SPECIFICITY",
894
+ "DEVELOPMENTAL STAGE",
895
+ "MUTAGENESIS OF PHE-175 AND PHE-221"],
896
+ @obj.ref.first['RP'])
897
+ end
898
+
899
+ def test_RC
900
+ assert_equal([{"Text"=>"Bristol N2", "Token"=>"STRAIN"}],
901
+ @obj.ref.first['RC'])
902
+ end
903
+
904
+ def test_RX
905
+ assert_equal({'MEDLINE' => nil,
906
+ 'PubMed' => '11134024',
907
+ 'DOI' => '10.1074/jbc.M008990200'},
908
+ @obj.ref.first['RX'])
909
+ end
910
+
911
+ def test_RG
912
+ assert_equal(["The mouse genome sequencing consortium"],
913
+ @obj.ref.first['RG'])
914
+ end
915
+
916
+ def test_RA
917
+ assert_equal("Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
918
+ @obj.ref.first['RA'])
919
+ end
920
+
921
+ def test_RT
922
+ assert_equal("A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
923
+ @obj.ref.first['RT'])
924
+ end
925
+
926
+ def test_RL
927
+ assert_equal("J. Biol. Chem. 276:10463-10475(2001).",
928
+ @obj.ref.first['RL'])
929
+ end
930
+
931
+ end # class TestSPTRReferences
932
+
933
+
934
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.0
935
+ class TestSPTRSwissProtRel41_0 < Test::Unit::TestCase
936
+ # Progress in the conversion of Swiss-Prot to mixed-case characters
937
+
938
+ # Multiple RP lines
939
+ def test_multiple_RP_lines
940
+ data = "RN [1]
941
+ RP SEQUENCE FROM N.A., SEQUENCE OF 23-42 AND 351-365, AND
942
+ RP CHARACTERIZATION."
943
+ sp = SPTR.new(data)
944
+ assert_equal(['SEQUENCE FROM N.A.',
945
+ 'SEQUENCE OF 23-42 AND 351-365',
946
+ 'CHARACTERIZATION'],
947
+ sp.ref.first['RP'])
948
+ end
949
+ end
950
+
951
+
952
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.1
953
+ class TestSPTRSwissProtRel41_1 < Test::Unit::TestCase
954
+ # New syntax of the CC line topic ALTERNATIVE PRODUCTS
955
+ def test_alternative_products
956
+ data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
957
+ CC -!- ALTERNATIVE PRODUCTS:
958
+ CC Event=Alternative promoter;
959
+ CC Comment=Free text;
960
+ CC Event=Alternative splicing; Named isoforms=2;
961
+ CC Comment=Optional free text;
962
+ CC Name=Isoform_1; Synonyms=Synonym_1;
963
+ CC IsoId=Isoform_identifier_1;
964
+ CC Sequence=Displayed;
965
+ CC Note=Free text;
966
+ CC Name=Isoform_2; Synonyms=Synonym_1, Synonym_2;
967
+ CC IsoId=Isoform_identifier_1, Isoform_identifer_2;
968
+ CC Sequence=VSP_identifier_1, VSP_identifier_2;
969
+ CC Note=Free text;
970
+ CC Event=Alternative initiation;
971
+ CC Comment=Free text;"
972
+ sp = SPTR.new(data)
973
+ res = {"Comment" => "Free text",
974
+ "Named isoforms" => "2",
975
+ "Variants" => [{"Name" => "Isoform_1",
976
+ "Synonyms" => ["Synonym_1"],
977
+ "IsoId" => ["Isoform_identifier_1"],
978
+ "Sequence" => ["Displayed"] },
979
+ {"Name" => "Isoform_2",
980
+ "Synonyms" => ["Synonym_1", "Synonym_2"],
981
+ "IsoId" => ["Isoform_identifier_1", "Isoform_identifer_2"],
982
+ "Sequence" => ["VSP_identifier_1", "VSP_identifier_2"]}],
983
+ "Event" => ["Alternative promoter"]}
984
+ assert_equal(res,
985
+ sp.cc('ALTERNATIVE PRODUCTS'))
986
+ end
987
+
988
+ def test_alternative_products_with_ft
989
+ data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
990
+ CC -!- ALTERNATIVE PRODUCTS:
991
+ CC Event=Alternative splicing; Named isoforms=6;
992
+ CC Name=1;
993
+ CC IsoId=Q15746-4; Sequence=Displayed;
994
+ CC Name=2;
995
+ CC IsoId=Q15746-5; Sequence=VSP_000040;
996
+ CC Name=3A;
997
+ CC IsoId=Q15746-6; Sequence=VSP_000041, VSP_000043;
998
+ CC Name=3B;
999
+ CC IsoId=Q15746-7; Sequence=VSP_000040, VSP_000041, VSP_000042;
1000
+ CC Name=4;
1001
+ CC IsoId=Q15746-8; Sequence=VSP_000041, VSP_000042;
1002
+ CC Name=del-1790;
1003
+ CC IsoId=Q15746-9; Sequence=VSP_000044;
1004
+ FT VARSPLIC 437 506 VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA
1005
+ FT RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in
1006
+ FT isoform 2 and isoform 3B).
1007
+ FT /FTId=VSP_004791.
1008
+ FT VARSPLIC 1433 1439 DEVEVSD -> MKWRCQT (in isoform 3A,
1009
+ FT isoform 3B and isoform 4).
1010
+ FT /FTId=VSP_004792.
1011
+ FT VARSPLIC 1473 1545 Missing (in isoform 4).
1012
+ FT /FTId=VSP_004793.
1013
+ FT VARSPLIC 1655 1705 Missing (in isoform 3A and isoform 3B).
1014
+ FT /FTId=VSP_004794.
1015
+ FT VARSPLIC 1790 1790 Missing (in isoform Del-1790).
1016
+ FT /FTId=VSP_004795."
1017
+ sp = SPTR.new(data)
1018
+
1019
+ assert_equal({"Comment" => "",
1020
+ "Named isoforms" => "6",
1021
+ "Variants" => [{"IsoId"=>["Q15746-4"],
1022
+ "Name"=>"1",
1023
+ "Synonyms"=>[],
1024
+ "Sequence"=>["Displayed"]},
1025
+ {"IsoId"=>["Q15746-5"],
1026
+ "Name"=>"2",
1027
+ "Synonyms"=>[],
1028
+ "Sequence"=>["VSP_000040"]},
1029
+ {"IsoId"=>["Q15746-6"],
1030
+ "Name"=>"3A",
1031
+ "Synonyms"=>[],
1032
+ "Sequence"=>["VSP_000041", "VSP_000043"]},
1033
+ {"IsoId"=>["Q15746-7"],
1034
+ "Name"=>"3B",
1035
+ "Synonyms"=>[],
1036
+ "Sequence"=>["VSP_000040", "VSP_000041", "VSP_000042"]},
1037
+ {"IsoId"=>["Q15746-8"],
1038
+ "Name"=>"4",
1039
+ "Synonyms"=>[],
1040
+ "Sequence"=>["VSP_000041", "VSP_000042"]},
1041
+ {"IsoId"=>["Q15746-9"],
1042
+ "Name"=>"del-1790",
1043
+ "Synonyms"=>[],
1044
+ "Sequence"=>["VSP_000044"]}],
1045
+ "Event"=>["Alternative splicing"]},
1046
+ sp.cc('ALTERNATIVE PRODUCTS'))
1047
+ assert_equal([{"FTId"=>"VSP_004791",
1048
+ "From"=>437,
1049
+ "To"=>506,
1050
+ "Description"=>"VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).",
1051
+ "diff"=> ["VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKARTRDSGTYSCTASNAQGQVSCSWTLQVER", "G"],
1052
+ "original"=> ["VARSPLIC", "437", "506", "VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).", "/FTId=VSP_004791."]},
1053
+ {"FTId"=>"VSP_004792",
1054
+ "From"=>1433,
1055
+ "diff"=>["DEVEVSD", "MKWRCQT"],
1056
+ "To"=>1439,
1057
+ "original"=> ["VARSPLIC", "1433", "1439", "DEVEVSD -> MKWRCQT (in isoform 3A, isoform 3B and isoform 4).", "/FTId=VSP_004792."],
1058
+ "Description"=>"DEVEVSD -> MKWRCQT (in isoform 3A, isoform 3B and isoform 4)."},
1059
+ {"FTId"=>"VSP_004793",
1060
+ "From"=>1473,
1061
+ "diff"=>[nil, nil],
1062
+ "To"=>1545,
1063
+ "original"=> ["VARSPLIC", "1473", "1545", "Missing (in isoform 4).", "/FTId=VSP_004793."], "Description"=>"Missing (in isoform 4)."},
1064
+ {"FTId"=>"VSP_004794",
1065
+ "From"=>1655,
1066
+ "diff"=>[nil, nil],
1067
+ "To"=>1705,
1068
+ "original"=> ["VARSPLIC", "1655", "1705", "Missing (in isoform 3A and isoform 3B).", "/FTId=VSP_004794."],
1069
+ "Description"=>"Missing (in isoform 3A and isoform 3B)."},
1070
+ {"FTId"=>"VSP_004795",
1071
+ "From"=>1790,
1072
+ "diff"=>[nil, nil],
1073
+ "To"=>1790,
1074
+ "original"=>["VARSPLIC", "1790", "1790", "Missing (in isoform Del-1790).", "/FTId=VSP_004795."],
1075
+ "Description"=>"Missing (in isoform Del-1790)."}],
1076
+ sp.ft['VARSPLIC'])
1077
+ end
1078
+ end
1079
+
1080
+
1081
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.10
1082
+ class TestSPTRSwissProtRel41_10 < Test::Unit::TestCase
1083
+ # Reference Comment (RC) line topics may span lines
1084
+ def test_RC_lines
1085
+ data = "RN [1]
1086
+ RC STRAIN=AZ.026, DC.005, GA.039, GA2181, IL.014, IN.018, KY.172, KY2.37,
1087
+ RC LA.013, MN.001, MNb027, MS.040, NY.016, OH.036, TN.173, TN2.38,
1088
+ RC UT.002, AL.012, AZ.180, MI.035, VA.015, and IL2.17;"
1089
+ sp = SPTR.new(data)
1090
+ assert_equal([{"Text"=>"AZ.026", "Token"=>"STRAIN"},
1091
+ {"Text"=>"DC.005", "Token"=>"STRAIN"},
1092
+ {"Text"=>"GA.039", "Token"=>"STRAIN"},
1093
+ {"Text"=>"GA2181", "Token"=>"STRAIN"},
1094
+ {"Text"=>"IL.014", "Token"=>"STRAIN"},
1095
+ {"Text"=>"IN.018", "Token"=>"STRAIN"},
1096
+ {"Text"=>"KY.172", "Token"=>"STRAIN"},
1097
+ {"Text"=>"KY2.37", "Token"=>"STRAIN"},
1098
+ {"Text"=>"LA.013", "Token"=>"STRAIN"},
1099
+ {"Text"=>"MN.001", "Token"=>"STRAIN"},
1100
+ {"Text"=>"MNb027", "Token"=>"STRAIN"},
1101
+ {"Text"=>"MS.040", "Token"=>"STRAIN"},
1102
+ {"Text"=>"NY.016", "Token"=>"STRAIN"},
1103
+ {"Text"=>"OH.036", "Token"=>"STRAIN"},
1104
+ {"Text"=>"TN.173", "Token"=>"STRAIN"},
1105
+ {"Text"=>"TN2.38", "Token"=>"STRAIN"},
1106
+ {"Text"=>"UT.002", "Token"=>"STRAIN"},
1107
+ {"Text"=>"AL.012", "Token"=>"STRAIN"},
1108
+ {"Text"=>"AZ.180", "Token"=>"STRAIN"},
1109
+ {"Text"=>"MI.035", "Token"=>"STRAIN"},
1110
+ {"Text"=>"VA.015", "Token"=>"STRAIN"},
1111
+ {"Text"=>"IL2.17", "Token"=>"STRAIN"}],
1112
+ sp.ref.first['RC'])
1113
+ end
1114
+ end
1115
+
1116
+
1117
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.20
1118
+ class TestSPTRSwissProtRel41_20 < Test::Unit::TestCase
1119
+ # Case and wording change for submissions to Swiss-Prot in reference location (RL) lines
1120
+ def test_RL_lines
1121
+ data = "RL Submitted (MAY-2002) to the SWISS-PROT data bank."
1122
+ sp = SPTR.new(data)
1123
+ assert_equal('',
1124
+ sp.ref.first['RL'])
1125
+ end
1126
+
1127
+ # New comment line (CC) topic ALLERGEN
1128
+ def test_CC_allergen
1129
+ data = "CC -!- ALLERGEN: Causes an allergic reaction in human. Binds IgE. It is a
1130
+ CC partially heat-labile allergen that may cause both respiratory and
1131
+ CC food-allergy symptoms in patients with the bird-egg syndrome."
1132
+ sp = SPTR.new(data)
1133
+ assert_equal(["Causes an allergic reaction in human. Binds IgE. It is a partially heat-labile allergen that may cause both respiratory and food-allergy symptoms in patients with the bird-egg syndrome."],
1134
+ sp.cc("ALLERGEN"))
1135
+ end
1136
+ end
1137
+
1138
+
1139
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel42.6
1140
+ class TestSPTRSwissProtRel42_6 < Test::Unit::TestCase
1141
+ # New comment line (CC) topic RNA EDITING
1142
+ def test_CC_rna_editing
1143
+ data = "CC -!- RNA EDITING: Modified_positions=393, 431, 452, 495."
1144
+ sp = SPTR.new(data)
1145
+ assert_equal({"Note"=>"",
1146
+ "Modified_positions"=>['393', '431', '452', '495']},
1147
+ sp.cc("RNA EDITING"))
1148
+
1149
+ data = "CC -!- RNA EDITING: Modified_positions=59, 78, 94, 98, 102, 121; Note=The
1150
+ CC stop codon at position 121 is created by RNA editing. The nonsense
1151
+ CC codon at position 59 is modified to a sense codon."
1152
+ sp = SPTR.new(data)
1153
+ assert_equal({"Note"=>"The stop codon at position 121 is created by RNA editing. The nonsense codon at position 59 is modified to a sense codon.",
1154
+ "Modified_positions"=>['59', '78', '94', '98', '102', '121']},
1155
+ sp.cc("RNA EDITING"))
1156
+
1157
+ data = "CC -!- RNA EDITING: Modified_positions=Not_applicable; Note=Some
1158
+ CC positions are modified by RNA editing via nucleotide insertion or
1159
+ CC deletion. The initiator methionine is created by RNA editing."
1160
+ sp = SPTR.new(data)
1161
+ assert_equal({'Modified_positions' => ['Not_applicable'],
1162
+ 'Note' => "Some positions are modified by RNA editing via nucleotide insertion or deletion. The initiator methionine is created by RNA editing."},
1163
+ sp.cc("RNA EDITING"))
1164
+ end
1165
+ end
1166
+
1167
+
1168
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel1_12
1169
+ class TestSPTRUniProtRel1_12 < Test::Unit::TestCase
1170
+ # Digital Object Identifier (DOI) in the RX line
1171
+ def test_DOI_in_RX_line
1172
+ # RX [MEDLINE=Medline_identifier; ][PubMed=Pubmed_identifier; ][DOI=Digital_object_identifier;]
1173
+ data = "
1174
+ RN [1]
1175
+ RX MEDLINE=97291283; PubMed=9145897; DOI=10.1007/s00248-002-2038-4;"
1176
+ sp = SPTR.new(data)
1177
+ assert_equal({'MEDLINE' => '97291283',
1178
+ 'PubMed' => '9145897',
1179
+ 'DOI' => '10.1007/s00248-002-2038-4'},
1180
+ sp.ref.first['RX'])
1181
+ end
1182
+
1183
+ # New line type: RG (Reference Group)
1184
+ def test_RG_line
1185
+ data = "
1186
+ RN [1]
1187
+ RG The C. elegans sequencing consortium;
1188
+ RG The Brazilian network for HIV isolation and characterization;"
1189
+ sp = SPTR.new(data)
1190
+ assert_equal(['The C. elegans sequencing consortium',
1191
+ 'The Brazilian network for HIV isolation and characterization'],
1192
+ sp.ref.first['RG'])
1193
+ end
1194
+ end
1195
+
1196
+
1197
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_0
1198
+ class TestSPTRUniProtRel2_0 < Test::Unit::TestCase
1199
+ # New format for the GN (Gene Name) line
1200
+ # GN Name=<name>; Synonyms=<name1>[, <name2>...]; OrderedLocusNames=<name1>[, <name2>...];
1201
+ # xsGN ORFNames=<name1>[, <name2>...];
1202
+ def test_GN_line
1203
+ data = "GN Name=atpG; Synonyms=uncG, papC;
1204
+ GN OrderedLocusNames=b3733, c4659, z5231, ECs4675, SF3813, S3955;"
1205
+ sp = SPTR.new(data)
1206
+ assert_equal([{:orfs => [],
1207
+ :loci => ["b3733", "c4659", "z5231", "ECs4675", "SF3813", "S3955"],
1208
+ :name => "atpG",
1209
+ :synonyms => ["uncG", "papC"]}],
1210
+ sp.gn)
1211
+
1212
+ data = "GN ORFNames=SPAC1834.11c;"
1213
+ sp = SPTR.new(data)
1214
+ assert_equal([{:orfs => ['SPAC1834.11c'],
1215
+ :loci => [],
1216
+ :name => '',
1217
+ :synonyms => []}],
1218
+ sp.gn)
1219
+
1220
+ data = "GN Name=cysA1; Synonyms=cysA; OrderedLocusNames=Rv3117, MT3199;
1221
+ GN ORFNames=MTCY164.27;
1222
+ GN and
1223
+ GN Name=cysA2; OrderedLocusNames=Rv0815c, MT0837; ORFNames=MTV043.07c;"
1224
+ sp = SPTR.new(data)
1225
+ assert_equal([{:orfs => ["MTCY164.27"],
1226
+ :loci => ["Rv3117", "MT3199"],
1227
+ :name => "cysA1",
1228
+ :synonyms => ["cysA"]},
1229
+ {:orfs => ["MTV043.07c"],
1230
+ :loci => ["Rv0815c", "MT0837"],
1231
+ :name => "cysA2",
1232
+ :synonyms => []}],
1233
+ sp.gn)
1234
+ end
1235
+ end
1236
+
1237
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_1
1238
+ class TestSPTRUniProtRel2_1 < Test::Unit::TestCase
1239
+ # Format change in the comment line (CC) topic: MASS SPECTROMETRY
1240
+ def test_CC_mass_spectrometry
1241
+ data = "CC -!- MASS SPECTROMETRY: MW=32875.93; METHOD=MALDI;
1242
+ CC RANGE=1-284 (Isoform 3); NOTE=Ref.6."
1243
+ sp = SPTR.new(data)
1244
+ assert_equal([{"RANGE"=>"1-284",
1245
+ "METHOD"=>"MALDI",
1246
+ "MW_ERR"=>nil,
1247
+ "NOTE"=>"Ref.6",
1248
+ "MW"=>"32875.93"}],
1249
+ sp.cc("MASS SPECTROMETRY"))
1250
+ end
1251
+ end
1252
+
1253
+
1254
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_3
1255
+ class TestSPTRUniProtRel2_3 < Test::Unit::TestCase
1256
+ # New RL line structure for electronic publications
1257
+ def test_RL_line
1258
+ data = "RL Submitted (XXX-YYYY) to the HIV data bank."
1259
+ sp = SPTR.new(data)
1260
+ assert_equal('',
1261
+ sp.ref.first['RL'])
1262
+ end
1263
+
1264
+ # Format change in the cross-reference to PDB
1265
+ def test_DR_PDB
1266
+ data = "DR PDB; 1NB3; X-ray; A/B/C/D=116-335, P/R/S/T=98-105."
1267
+ sp = SPTR.new(data)
1268
+ assert_equal([["1NB3", "X-ray", "A/B/C/D=116-335, P/R/S/T=98-105"]],
1269
+ sp.dr['PDB'])
1270
+ end
1271
+ end
1272
+
1273
+
1274
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_4
1275
+ class TestSPTRUniProtRel3_4 < Test::Unit::TestCase
1276
+ # Changes in the RP (Reference Position) line
1277
+ def test_RP_line
1278
+ data = "
1279
+ RN [1]
1280
+ RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1), PROTEIN SEQUENCE
1281
+ RP OF 108-131; 220-231 AND 349-393, CHARACTERIZATION, AND MUTAGENESIS OF
1282
+ RP ARG-336."
1283
+ sp = SPTR.new(data)
1284
+ assert_equal(['NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1)',
1285
+ 'PROTEIN SEQUENCE OF 108-131; 220-231 AND 349-393',
1286
+ 'CHARACTERIZATION',
1287
+ 'MUTAGENESIS OF ARG-336'],
1288
+ sp.ref.first['RP'])
1289
+
1290
+ data = "
1291
+ RN [1]
1292
+ RP NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]."
1293
+ sp = SPTR.new(data)
1294
+ assert_equal(['NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]'],
1295
+ sp.ref.first['RP'])
1296
+ end
1297
+
1298
+
1299
+ # New comment line (CC) topic: BIOPHYSICOCHEMICAL PROPERTIES
1300
+ def test_CC_biophysiochemical_properties
1301
+ data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
1302
+ CC Absorption:
1303
+ CC Abs(max)=395 nm;
1304
+ CC Note=Exhibits a smaller absorbance peak at 470 nm. The
1305
+ CC fluorescence emission spectrum peaks at 509 nm with a shoulder
1306
+ CC at 540 nm;"
1307
+ sp = SPTR.new(data)
1308
+ assert_equal({"Redox potential" => "",
1309
+ "Temperature dependence" => "",
1310
+ "Kinetic parameters" => {},
1311
+ "Absorption" => {"Note" => "Exhibits a smaller absorbance peak at 470 nm. The fluorescence emission spectrum peaks at 509 nm with a shoulder at 540 nm",
1312
+ "Abs(max)" => "395 nm"},
1313
+ "pH dependence" => ""},
1314
+ sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
1315
+
1316
+ data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
1317
+ CC Kinetic parameters:
1318
+ CC KM=62 mM for glucose;
1319
+ CC KM=90 mM for maltose;
1320
+ CC Vmax=0.20 mmol/min/mg enzyme with glucose as substrate;
1321
+ CC Vmax=0.11 mmol/min/mg enzyme with maltose as substrate;
1322
+ CC Note=Acetylates glucose, maltose, mannose, galactose, and
1323
+ CC fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07,
1324
+ CC 0.04;"
1325
+ sp = SPTR.new(data)
1326
+ assert_equal({"Redox potential" => "",
1327
+ "Temperature dependence" => "",
1328
+ "Kinetic parameters" => {"KM" => "62 mM for glucose; KM=90 mM for maltose",
1329
+ "Note" => "Acetylates glucose, maltose, mannose, galactose, and fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07, 0.04",
1330
+ "Vmax" => "0.20 mmol/min/mg enzyme with glucose as substrate"},
1331
+ "Absorption" => {},
1332
+ "pH dependence" => ""},
1333
+ sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
1334
+
1335
+ data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
1336
+ CC Kinetic parameters:
1337
+ CC KM=1.76 uM for chlorophyll;
1338
+ CC pH dependence:
1339
+ CC Optimum pH is 7.5. Active from pH 5.0 to 9.0;
1340
+ CC Temperature dependence:
1341
+ CC Optimum temperature is 45 degrees Celsius. Active from 30 to 60
1342
+ CC degrees Celsius;"
1343
+ sp = SPTR.new(data)
1344
+ assert_equal({"Redox potential" => "",
1345
+ "Temperature dependence" => "Optimum temperature is 45 degrees Celsius. Active from 30 to 60 degrees Celsius",
1346
+ "Kinetic parameters" => {},
1347
+ "Absorption" => {},
1348
+ "pH dependence" => "Optimum pH is 7.5. Active from pH 5.0 to 9.0"},
1349
+ sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
1350
+ end
1351
+ end
1352
+
1353
+
1354
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_5
1355
+ class TestSPTRUniProtRel3_5 < Test::Unit::TestCase
1356
+ # Extension of the Swiss-Prot entry name format
1357
+ def test_entry_name_format
1358
+ # TBD
1359
+ end
1360
+ end
1361
+
1362
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel4_0
1363
+ class TestSPTRUniProtRel4_0 < Test::Unit::TestCase
1364
+ # Extension of the TrEMBL entry name format
1365
+
1366
+ # Change of the entry name in many Swiss-Prot entries
1367
+
1368
+ # New comment line (CC) topic: INTERACTION
1369
+ def test_CC_interaction
1370
+ data = "CC -!- INTERACTION:
1371
+ CC P11450:fcp3c; NbExp=1; IntAct=EBI-126914, EBI-159556;"
1372
+ sp = SPTR.new(data)
1373
+ assert_equal([{"SP_Ac" => "P11450",
1374
+ "identifier" => "fcp3c",
1375
+ "optional_identifier" => nil,
1376
+ "NbExp" => "1",
1377
+ "IntAct" => ["EBI-126914", "EBI-159556"]}],
1378
+ sp.cc("INTERACTION"))
1379
+ end
1380
+
1381
+ def test_CC_interaction_isoform
1382
+ data = "CC -!- INTERACTION:
1383
+ CC Q9W1K5-1:cg11299; NbExp=1; IntAct=EBI-133844, EBI-212772;"
1384
+ sp = SPTR.new(data)
1385
+ assert_equal([{"SP_Ac" => 'Q9W1K5-1',
1386
+ "identifier" => 'cg11299',
1387
+ "optional_identifier" => nil,
1388
+ "NbExp" => "1",
1389
+ "IntAct" => ["EBI-133844", "EBI-212772"]}],
1390
+ sp.cc("INTERACTION"))
1391
+ end
1392
+
1393
+ def test_CC_interaction_no_gene_name
1394
+ data = "CC -!- INTERACTION:
1395
+ CC Q8NI08:-; NbExp=1; IntAct=EBI-80809, EBI-80799;"
1396
+ sp = SPTR.new(data)
1397
+ assert_equal([{"SP_Ac" => 'Q8NI08',
1398
+ "identifier" => '-',
1399
+ "optional_identifier" => nil,
1400
+ "NbExp" => "1",
1401
+ "IntAct" => ["EBI-80809", "EBI-80799"]}],
1402
+ sp.cc("INTERACTION"))
1403
+ end
1404
+
1405
+ def test_CC_interaction_self_association
1406
+ data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
1407
+ CC -!- INTERACTION:
1408
+ CC Self; NbExp=1; IntAct=EBI-123485, EBI-123485;"
1409
+ sp = SPTR.new(data)
1410
+ assert_equal([{"SP_Ac" => 'TEST_ENTRY',
1411
+ "identifier" => 'TEST_ENTRY',
1412
+ "optional_identifier" => nil,
1413
+ "NbExp" => "1",
1414
+ "IntAct" => ["EBI-123485", "EBI-123485"]}],
1415
+ sp.cc("INTERACTION"))
1416
+ end
1417
+
1418
+ def test_CC_interaction_The_source_organisms_of_the_interacting_proteins_are_different
1419
+ data = "CC -!- INTERACTION:
1420
+ CC Q8C1S0:2410018m14rik (xeno); NbExp=1; IntAct=EBI-394562, EBI-398761;"
1421
+ sp = SPTR.new(data)
1422
+ assert_equal([{"SP_Ac" => 'Q8C1S0',
1423
+ "identifier" => '2410018m14rik',
1424
+ "optional_identifier" => '(xeno)',
1425
+ "NbExp" => "1",
1426
+ "IntAct" => ["EBI-394562", "EBI-398761"]}],
1427
+ sp.cc("INTERACTION"))
1428
+ end
1429
+
1430
+ def test_CC_interaction_Different_isoforms_of_the_current_protein_are_shown_to_interact_with_the_same_protein
1431
+ data = "CC -!- INTERACTION:
1432
+ CC P51617:irak1; NbExp=1; IntAct=EBI-448466, EBI-358664;
1433
+ CC P51617:irak1; NbExp=1; IntAct=EBI-448472, EBI-358664;"
1434
+ sp = SPTR.new(data)
1435
+ assert_equal([{"SP_Ac" => "P51617",
1436
+ "identifier" => "irak1",
1437
+ "optional_identifier" => nil,
1438
+ "NbExp" => "1",
1439
+ "IntAct" => ["EBI-448466", "EBI-358664"]},
1440
+ {"SP_Ac" => "P51617",
1441
+ "identifier" => "irak1",
1442
+ "optional_identifier" => nil,
1443
+ "NbExp" => "1",
1444
+ "IntAct" => ["EBI-448472", "EBI-358664"]}],
1445
+ sp.cc("INTERACTION"))
1446
+ end
1447
+ end
1448
+
1449
+
1450
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_0
1451
+ class TestSPTRUniProtRel5_0 < Test::Unit::TestCase
1452
+ # Format change in the DR line
1453
+ # DR DATABASE_IDENTIFIER; PRIMARY_IDENTIFIER; SECONDARY_IDENTIFIER[; TERTIARY_IDENTIFIER][; QUATERNARY_IDENTIFIER].
1454
+ def test_DR_line
1455
+ data = "
1456
+ DR EMBL; M68939; AAA26107.1; -; Genomic_DNA.
1457
+ DR EMBL; U56386; AAB72034.1; -; mRNA."
1458
+
1459
+ sp = SPTR.new(data)
1460
+ assert_equal([["M68939", "AAA26107.1", "-", "Genomic_DNA"],
1461
+ ["U56386", "AAB72034.1", "-", "mRNA"]],
1462
+ sp.dr['EMBL'])
1463
+
1464
+ assert_equal([{" "=>"-",
1465
+ "Version"=>"AAA26107.1",
1466
+ "Accession"=>"M68939",
1467
+ "Molecular Type"=>"Genomic_DNA"},
1468
+ {" "=>"-",
1469
+ "Version"=>"AAB72034.1",
1470
+ "Accession"=>"U56386",
1471
+ "Molecular Type"=>"mRNA"}],
1472
+ sp.dr('EMBL'))
1473
+
1474
+ end
1475
+ # New feature (FT) keys and redefinition of existing FT keys
1476
+ end
1477
+
1478
+
1479
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_4
1480
+ class TestSPTRUniProtRel5_4 < Test::Unit::TestCase
1481
+ # Multiple comment line (CC) topics COFACTOR
1482
+ def test_multiple_cofactors
1483
+ data = "CC -!- COFACTOR: Binds 1 2Fe-2S cluster per subunit (By similarity).
1484
+ CC -!- COFACTOR: Binds 1 Fe(2+) ion per subunit (By similarity)."
1485
+ sp = SPTR.new(data)
1486
+ assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).",
1487
+ "Binds 1 Fe(2+) ion per subunit (By similarity)."],
1488
+ sp.cc['COFACTOR'])
1489
+ assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).",
1490
+ "Binds 1 Fe(2+) ion per subunit (By similarity)."],
1491
+ sp.cc('COFACTOR'))
1492
+ end
267
1493
  end
268
- end
1494
+
1495
+
1496
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_0
1497
+ class TestSPTRUniProtRel6_0 < Test::Unit::TestCase
1498
+ # Changes in the OG (OrGanelle) line
1499
+ def test_OG_line
1500
+ data = "OG Plastid."
1501
+ sp = SPTR.new(data)
1502
+ assert_equal(['Plastid'], sp.og)
1503
+
1504
+ data = "OG Plastid; Apicoplast."
1505
+ sp = SPTR.new(data)
1506
+ assert_equal(['Plastid', 'Apicoplast'], sp.og)
1507
+
1508
+ data = "OG Plastid; Chloroplast."
1509
+ sp = SPTR.new(data)
1510
+ assert_equal(['Plastid', 'Chloroplast'], sp.og)
1511
+
1512
+ data = "OG Plastid; Cyanelle."
1513
+ sp = SPTR.new(data)
1514
+ assert_equal(['Plastid', 'Cyanelle'], sp.og)
1515
+
1516
+ data = "OG Plastid; Non-photosynthetic plastid."
1517
+ sp = SPTR.new(data)
1518
+ assert_equal(['Plastid', 'Non-photosynthetic plastid'], sp.og)
1519
+ end
1520
+ end
1521
+
1522
+
1523
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_1
1524
+ class TestSPTRUniProtRel6_1 < Test::Unit::TestCase
1525
+ # Annotation changes concerning the feature key METAL
1526
+ def test_FT_metal
1527
+ old_data = "FT METAL 61 61 Copper and zinc."
1528
+ sp = SPTR.new(old_data)
1529
+ assert_equal([{'From' => 61,
1530
+ 'To' => 61,
1531
+ 'Description' => 'Copper and zinc.',
1532
+ 'FTId' =>'',
1533
+ 'diff' => [],
1534
+ 'original' => ["METAL", "61", "61", "Copper and zinc.", ""]}],
1535
+ sp.ft['METAL'])
1536
+
1537
+ new_data = "FT METAL 61 61 Copper.
1538
+ FT METAL 61 61 Zinc."
1539
+ sp = SPTR.new(new_data)
1540
+ assert_equal([{"From" => 61,
1541
+ "To" => 61,
1542
+ "Description" => "Copper.",
1543
+ "FTId" => "",
1544
+ "diff" => [],
1545
+ "original" => ["METAL", "61", "61", "Copper.", ""]},
1546
+ {"From" => 61,
1547
+ "To" => 61,
1548
+ "Description" => "Zinc.",
1549
+ "FTId" => "",
1550
+ "diff" => [],
1551
+ "original" => ["METAL", "61", "61", "Zinc.", ""]}],
1552
+ sp.ft['METAL'])
1553
+ end
1554
+ end
1555
+
1556
+
1557
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_5
1558
+ class TestSPTRUniProtRel6_5 < Test::Unit::TestCase
1559
+ # Changes in the keywlist.txt file
1560
+ # * Modification of the HI line format:
1561
+ def test_HI_line
1562
+ # HI Category: Keyword_1; ...; Keyword_n; Described_Keyword.
1563
+ # The first term listed in an HI line is a category. It is followed by a hierarchical list of keywords of that category and ends with the described keyword. There can be more than one HI line of the same category in one keyword entry.
1564
+ data = "HI Molecular function: Ionic channel; Calcium channel.
1565
+ HI Biological process: Transport; Ion transport; Calcium transport; Calcium channel.
1566
+ HI Ligand: Calcium; Calcium channel."
1567
+ sp = SPTR.new(data)
1568
+ assert_equal([{'Category' => 'Molecular function',
1569
+ 'Keywords' => ['Ionic channel'],
1570
+ 'Keyword' => 'Calcium channel'},
1571
+ {'Category' => 'Biological process',
1572
+ 'Keywords' => ['Transport', 'Ion transport', 'Calcium transport'],
1573
+ 'Keyword' => 'Calcium channel'},
1574
+ {'Category' => 'Ligand',
1575
+ 'Keywords' => ['Calcium'],
1576
+ 'Keyword' => 'Calcium channel'}],
1577
+ sp.hi)
1578
+ end
1579
+ end
1580
+
1581
+
1582
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.0
1583
+ class TestSPTRUniProtRel7_0 < Test::Unit::TestCase
1584
+ # Changes concerning dates and versions numbers (DT lines)
1585
+ def test_DT_line
1586
+ up_sp_data = "DT 01-JAN-1998, integrated into UniProtKB/Swiss-Prot.
1587
+ DT 15-OCT-2001, sequence version 3.
1588
+ DT 01-APR-2004, entry version 14."
1589
+ sp = SPTR.new(up_sp_data)
1590
+ assert_equal({"sequence" => "15-OCT-2001, sequence version 3.",
1591
+ "annotation" => "01-APR-2004, entry version 14.",
1592
+ "created" => "01-JAN-1998, integrated into UniProtKB/Swiss-Prot."},
1593
+ sp.dt)
1594
+
1595
+ up_tr_data = "DT 01-FEB-1999, integrated into UniProtKB/TrEMBL.
1596
+ DT 15-OCT-2000, sequence version 2.
1597
+ DT 15-DEC-2004, entry version 5."
1598
+ sp = SPTR.new(up_tr_data)
1599
+ assert_equal({"sequence" => "15-OCT-2000, sequence version 2.",
1600
+ "annotation" => "15-DEC-2004, entry version 5.",
1601
+ "created" => "01-FEB-1999, integrated into UniProtKB/TrEMBL."},
1602
+ sp.dt)
1603
+ end
1604
+
1605
+ # Addition of a feature (FT) key CHAIN over the whole sequence length
1606
+
1607
+ # Changes concerning the copyright statement
1608
+ def test_CC_copyright_statement
1609
+ data = "CC -----------------------------------------------------------------------
1610
+ CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
1611
+ CC Distributed under the Creative Commons Attribution-NoDerivs License
1612
+ CC -----------------------------------------------------------------------"
1613
+ sp = SPTR.new(data)
1614
+ assert_equal({}, sp.cc)
1615
+ end
1616
+ end
1617
+
1618
+
1619
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.6
1620
+ class TestSPTRUniProtRel7_6 < Test::Unit::TestCase
1621
+ # Sequences with over 10000 amino acids in UniProtKB/Swiss-Prot
1622
+ def test_10000aa
1623
+ entry_id = 'Q09165'
1624
+ data = ["SQ SEQUENCE 393 AA; 43653 MW; AD5C149FD8106131 CRC64;\n",
1625
+ " MEEPQSDPSV EPPLSQETFS DLWKLLPENN VLSPLPSQAM DDLMLSPDDI EQWFTEDPGP\n" * 200,
1626
+ "//\n"].join
1627
+ sp = SPTR.new(data)
1628
+ assert(12000, sp.seq.size)
1629
+ end
1630
+ end
1631
+
1632
+
1633
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel8.0
1634
+ class TestSPTRUniProtRel8_0 < Test::Unit::TestCase
1635
+ # Replacement of the feature key VARSPLIC by VAR_SEQ
1636
+ def test_FT_VER_SEQ
1637
+ data = "FT VAR_SEQ 1 34 Missing (in isoform 3).
1638
+ FT /FTId=VSP_004099."
1639
+ sp = SPTR.new(data)
1640
+ res = [{'From' => 1,
1641
+ 'To' => 34,
1642
+ 'Description' => 'Missing (in isoform 3).',
1643
+ 'diff' => ['', nil],
1644
+ 'FTId' => 'VSP_004099',
1645
+ 'original' => ["VAR_SEQ", "1", "34", "Missing (in isoform 3).",
1646
+ "/FTId=VSP_004099."]}]
1647
+ assert_equal(res, sp.ft('VAR_SEQ'))
1648
+ end
1649
+
1650
+
1651
+ # Syntax modification of the comment line (CC) topic ALTERNATIVE PRODUCTS
1652
+ def test_CC_alternative_products
1653
+ # CC -!- ALTERNATIVE PRODUCTS:
1654
+ # CC Event=Event(, Event)*; Named isoforms=Number_of_isoforms;
1655
+ # (CC Comment=Free_text;)?
1656
+ # (CC Name=Isoform_name;( Synonyms=Synonym(, Synonym)*;)?
1657
+ # CC IsoId=Isoform_identifier(, Isoform_identifer)*;
1658
+ # CC Sequence=(Displayed|External|Not described|Feature_identifier(, Feature_identifier)*);
1659
+ # (CC Note=Free_text;)?)+
1660
+ # Note: Variable values are represented in italics. Perl-style multipliers indicate whether a pattern (as delimited by parentheses) is optional (?), may occur 0 or more times (*), or 1 or more times (+). Alternative values are separated by a pipe symbol (|).
1661
+
1662
+ data = "CC -!- ALTERNATIVE PRODUCTS:
1663
+ CC Event=Alternative splicing, Alternative initiation; Named isoforms=3;
1664
+ CC Comment=Isoform 1 and isoform 2 arise due to the use of two
1665
+ CC alternative first exons joined to a common exon 2 at the same
1666
+ CC acceptor site but in different reading frames, resulting in two
1667
+ CC completely different isoforms;
1668
+ CC Name=1; Synonyms=p16INK4a;
1669
+ CC IsoId=O77617-1; Sequence=Displayed;
1670
+ CC Name=3;
1671
+ CC IsoId=O77617-2; Sequence=VSP_004099;
1672
+ CC Note=Produced by alternative initiation at Met-35 of isoform 1;
1673
+ CC Name=2; Synonyms=p19ARF;
1674
+ CC IsoId=O77618-1; Sequence=External;
1675
+ FT VAR_SEQ 1 34 Missing (in isoform 3).
1676
+ FT /FTId=VSP_004099."
1677
+ sp = SPTR.new(data)
1678
+ assert_equal({"Comment" => "Isoform 1 and isoform 2 arise due to the use of two alternative first exons joined to a common exon 2 at the same acceptor site but in different reading frames, resulting in two completely different isoforms",
1679
+ "Named isoforms" => "3",
1680
+ "Variants" => [{"IsoId" => ["O77617-1"],
1681
+ "Name" => "1",
1682
+ "Synonyms" => ["p16INK4a"],
1683
+ "Sequence" => ["Displayed"]},
1684
+ {"IsoId" => ["O77617-2"],
1685
+ "Name" => "3",
1686
+ "Synonyms" => [],
1687
+ "Sequence" => ["VSP_004099"]},
1688
+ {"IsoId" => ["O77618-1"],
1689
+ "Name" => "2",
1690
+ "Synonyms" => ["p19ARF"],
1691
+ "Sequence" => ["External"]}],
1692
+ "Event" => ["Alternative splicing", "Alternative initiation"]},
1693
+ sp.cc("ALTERNATIVE PRODUCTS"))
1694
+ assert_equal([{"From" => 1,
1695
+ "To" => 34,
1696
+ "Description"=>"Missing (in isoform 3).",
1697
+ "FTId" => "VSP_004099",
1698
+ "diff" => ["", nil],
1699
+ "original"=> ["VAR_SEQ", "1", "34",
1700
+ "Missing (in isoform 3).", "/FTId=VSP_004099."]}],
1701
+ sp.ft("VAR_SEQ"))
1702
+ end
1703
+
1704
+
1705
+ # Replacement of the comment line (CC) topic DATABASE by WEB RESOURCE
1706
+ def test_CC_web_resource
1707
+ # CC -!- DATABASE: NAME=ResourceName[; NOTE=FreeText][; WWW=WWWAddress][; FTP=FTPAddress].
1708
+ # CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText]; URL=WWWAddress.
1709
+ # The length of these lines may exceed 75 characters because long URL addresses are not wrapped into multiple lines.
1710
+ assert(true)
1711
+ end
1712
+
1713
+ # Introduction of the new line type OH (Organism Host) for viral hosts
1714
+ def test_OH_lines
1715
+ data = 'OS Tomato black ring virus (strain E) (TBRV).
1716
+ OC Viruses; ssRNA positive-strand viruses, no DNA stage; Comoviridae;
1717
+ OC Nepovirus; Subgroup B.
1718
+ OX NCBI_TaxID=12277;
1719
+ OH NCBI_TaxID=4681; Allium porrum (Leek).
1720
+ OH NCBI_TaxID=4045; Apium graveolens (Celery).
1721
+ OH NCBI_TaxID=161934; Beta vulgaris (Sugar beet).
1722
+ OH NCBI_TaxID=38871; Fraxinus (ash trees).
1723
+ OH NCBI_TaxID=4236; Lactuca sativa (Garden lettuce).
1724
+ OH NCBI_TaxID=4081; Lycopersicon esculentum (Tomato).
1725
+ OH NCBI_TaxID=39639; Narcissus pseudonarcissus (Daffodil).
1726
+ OH NCBI_TaxID=3885; Phaseolus vulgaris (Kidney bean) (French bean).
1727
+ OH NCBI_TaxID=35938; Robinia pseudoacacia (Black locust).
1728
+ OH NCBI_TaxID=23216; Rubus (bramble).
1729
+ OH NCBI_TaxID=4113; Solanum tuberosum (Potato).
1730
+ OH NCBI_TaxID=13305; Tulipa.
1731
+ OH NCBI_TaxID=3603; Vitis.'
1732
+
1733
+ res = [{'NCBI_TaxID' => '4681', 'HostName' => 'Allium porrum (Leek)'},
1734
+ {'NCBI_TaxID' => '4045', 'HostName' => 'Apium graveolens (Celery)'},
1735
+ {'NCBI_TaxID' => '161934', 'HostName' => 'Beta vulgaris (Sugar beet)'},
1736
+ {'NCBI_TaxID' => '38871', 'HostName' => 'Fraxinus (ash trees)'},
1737
+ {'NCBI_TaxID' => '4236', 'HostName' => 'Lactuca sativa (Garden lettuce)'},
1738
+ {'NCBI_TaxID' => '4081', 'HostName' => 'Lycopersicon esculentum (Tomato)'},
1739
+ {'NCBI_TaxID' => '39639', 'HostName' => 'Narcissus pseudonarcissus (Daffodil)'},
1740
+ {'NCBI_TaxID' => '3885',
1741
+ 'HostName' => 'Phaseolus vulgaris (Kidney bean) (French bean)'},
1742
+ {'NCBI_TaxID' => '35938', 'HostName' => 'Robinia pseudoacacia (Black locust)'},
1743
+ {'NCBI_TaxID' => '23216', 'HostName' => 'Rubus (bramble)'},
1744
+ {'NCBI_TaxID' => '4113', 'HostName' => 'Solanum tuberosum (Potato)'},
1745
+ {'NCBI_TaxID' => '13305', 'HostName' => 'Tulipa'},
1746
+ {'NCBI_TaxID' => '3603', 'HostName' => 'Vitis'}]
1747
+ sp = SPTR.new(data)
1748
+ assert_equal(res, sp.oh)
1749
+ end
1750
+
1751
+ def test_OH_line_exception
1752
+ data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
1753
+ OH NCBI_TaxID=23216x: Rubus (bramble)."
1754
+ sp = SPTR.new(data)
1755
+ assert_raise(ArgumentError) { sp.oh }
1756
+ end
1757
+
1758
+ end
1759
+
1760
+ class TestOSLine < Test::Unit::TestCase
1761
+ def test_uncapitalized_letter_Q32725_9POAL
1762
+ data = "OS unknown cyperaceous sp.\n"
1763
+ sp = SPTR.new(data)
1764
+ assert_equal('unknown cyperaceous sp.', sp.os.first['os'])
1765
+ end
1766
+
1767
+ def test_period_trancation_O63147
1768
+ data = "OS Hippotis sp. Clark and Watts 825.\n"
1769
+ sp = SPTR.new(data)
1770
+ assert_equal('Hippotis sp. Clark and Watts 825.', sp.os.first['os'])
1771
+ end
1772
+ end
1773
+
1774
+ end # module Bio
1775
+