wwood-bioruby 1.2.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (386) hide show
  1. data/README.rdoc +205 -0
  2. data/README_DEV.rdoc +285 -0
  3. data/VERSION.yml +4 -0
  4. data/bin/bioruby +44 -0
  5. data/bin/br_biofetch.rb +47 -0
  6. data/bin/br_bioflat.rb +293 -0
  7. data/bin/br_biogetseq.rb +45 -0
  8. data/bin/br_pmfetch.rb +421 -0
  9. data/lib/bio.rb +306 -0
  10. data/lib/bio/alignment.rb +2518 -0
  11. data/lib/bio/appl/bl2seq/report.rb +334 -0
  12. data/lib/bio/appl/blast.rb +505 -0
  13. data/lib/bio/appl/blast/ddbj.rb +142 -0
  14. data/lib/bio/appl/blast/format0.rb +1438 -0
  15. data/lib/bio/appl/blast/format8.rb +83 -0
  16. data/lib/bio/appl/blast/genomenet.rb +263 -0
  17. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  18. data/lib/bio/appl/blast/remote.rb +105 -0
  19. data/lib/bio/appl/blast/report.rb +767 -0
  20. data/lib/bio/appl/blast/rexml.rb +144 -0
  21. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  22. data/lib/bio/appl/blast/wublast.rb +635 -0
  23. data/lib/bio/appl/blast/xmlparser.rb +236 -0
  24. data/lib/bio/appl/blat/report.rb +530 -0
  25. data/lib/bio/appl/clustalw.rb +219 -0
  26. data/lib/bio/appl/clustalw/report.rb +152 -0
  27. data/lib/bio/appl/emboss.rb +203 -0
  28. data/lib/bio/appl/fasta.rb +235 -0
  29. data/lib/bio/appl/fasta/format10.rb +325 -0
  30. data/lib/bio/appl/gcg/msf.rb +212 -0
  31. data/lib/bio/appl/gcg/seq.rb +195 -0
  32. data/lib/bio/appl/genscan/report.rb +552 -0
  33. data/lib/bio/appl/hmmer.rb +126 -0
  34. data/lib/bio/appl/hmmer/report.rb +683 -0
  35. data/lib/bio/appl/iprscan/report.rb +374 -0
  36. data/lib/bio/appl/mafft.rb +259 -0
  37. data/lib/bio/appl/mafft/report.rb +226 -0
  38. data/lib/bio/appl/muscle.rb +52 -0
  39. data/lib/bio/appl/paml/baseml.rb +95 -0
  40. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  41. data/lib/bio/appl/paml/codeml.rb +242 -0
  42. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  43. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  44. data/lib/bio/appl/paml/common.rb +348 -0
  45. data/lib/bio/appl/paml/common_report.rb +38 -0
  46. data/lib/bio/appl/paml/yn00.rb +103 -0
  47. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  48. data/lib/bio/appl/phylip/alignment.rb +133 -0
  49. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  50. data/lib/bio/appl/probcons.rb +41 -0
  51. data/lib/bio/appl/psort.rb +548 -0
  52. data/lib/bio/appl/psort/report.rb +542 -0
  53. data/lib/bio/appl/pts1.rb +263 -0
  54. data/lib/bio/appl/sim4.rb +124 -0
  55. data/lib/bio/appl/sim4/report.rb +485 -0
  56. data/lib/bio/appl/sosui/report.rb +151 -0
  57. data/lib/bio/appl/spidey/report.rb +593 -0
  58. data/lib/bio/appl/targetp/report.rb +267 -0
  59. data/lib/bio/appl/tcoffee.rb +55 -0
  60. data/lib/bio/appl/tmhmm/report.rb +231 -0
  61. data/lib/bio/command.rb +593 -0
  62. data/lib/bio/compat/features.rb +157 -0
  63. data/lib/bio/compat/references.rb +128 -0
  64. data/lib/bio/data/aa.rb +353 -0
  65. data/lib/bio/data/codontable.rb +722 -0
  66. data/lib/bio/data/na.rb +223 -0
  67. data/lib/bio/db.rb +329 -0
  68. data/lib/bio/db/aaindex.rb +357 -0
  69. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  70. data/lib/bio/db/biosql/sequence.rb +508 -0
  71. data/lib/bio/db/embl/common.rb +352 -0
  72. data/lib/bio/db/embl/embl.rb +500 -0
  73. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  74. data/lib/bio/db/embl/format_embl.rb +190 -0
  75. data/lib/bio/db/embl/sptr.rb +1283 -0
  76. data/lib/bio/db/embl/swissprot.rb +42 -0
  77. data/lib/bio/db/embl/trembl.rb +41 -0
  78. data/lib/bio/db/embl/uniprot.rb +42 -0
  79. data/lib/bio/db/fantom.rb +597 -0
  80. data/lib/bio/db/fasta.rb +410 -0
  81. data/lib/bio/db/fasta/defline.rb +532 -0
  82. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  83. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  84. data/lib/bio/db/genbank/common.rb +307 -0
  85. data/lib/bio/db/genbank/ddbj.rb +22 -0
  86. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  87. data/lib/bio/db/genbank/genbank.rb +250 -0
  88. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  89. data/lib/bio/db/genbank/genpept.rb +60 -0
  90. data/lib/bio/db/genbank/refseq.rb +18 -0
  91. data/lib/bio/db/gff.rb +1846 -0
  92. data/lib/bio/db/go.rb +481 -0
  93. data/lib/bio/db/kegg/brite.rb +41 -0
  94. data/lib/bio/db/kegg/compound.rb +131 -0
  95. data/lib/bio/db/kegg/drug.rb +98 -0
  96. data/lib/bio/db/kegg/enzyme.rb +148 -0
  97. data/lib/bio/db/kegg/expression.rb +155 -0
  98. data/lib/bio/db/kegg/genes.rb +263 -0
  99. data/lib/bio/db/kegg/genome.rb +241 -0
  100. data/lib/bio/db/kegg/glycan.rb +166 -0
  101. data/lib/bio/db/kegg/keggtab.rb +357 -0
  102. data/lib/bio/db/kegg/kgml.rb +256 -0
  103. data/lib/bio/db/kegg/orthology.rb +136 -0
  104. data/lib/bio/db/kegg/reaction.rb +82 -0
  105. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  106. data/lib/bio/db/lasergene.rb +209 -0
  107. data/lib/bio/db/litdb.rb +107 -0
  108. data/lib/bio/db/medline.rb +326 -0
  109. data/lib/bio/db/nbrf.rb +191 -0
  110. data/lib/bio/db/newick.rb +658 -0
  111. data/lib/bio/db/nexus.rb +1854 -0
  112. data/lib/bio/db/pdb.rb +29 -0
  113. data/lib/bio/db/pdb/atom.rb +77 -0
  114. data/lib/bio/db/pdb/chain.rb +210 -0
  115. data/lib/bio/db/pdb/chemicalcomponent.rb +224 -0
  116. data/lib/bio/db/pdb/model.rb +148 -0
  117. data/lib/bio/db/pdb/pdb.rb +1911 -0
  118. data/lib/bio/db/pdb/residue.rb +176 -0
  119. data/lib/bio/db/pdb/utils.rb +399 -0
  120. data/lib/bio/db/prosite.rb +597 -0
  121. data/lib/bio/db/rebase.rb +456 -0
  122. data/lib/bio/db/soft.rb +404 -0
  123. data/lib/bio/db/transfac.rb +375 -0
  124. data/lib/bio/db/url.rb +42 -0
  125. data/lib/bio/feature.rb +139 -0
  126. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  127. data/lib/bio/io/biosql/bioentry.rb +29 -0
  128. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  129. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  130. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  131. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  132. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  133. data/lib/bio/io/biosql/biosequence.rb +11 -0
  134. data/lib/bio/io/biosql/comment.rb +7 -0
  135. data/lib/bio/io/biosql/config/database.yml +20 -0
  136. data/lib/bio/io/biosql/dbxref.rb +13 -0
  137. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  138. data/lib/bio/io/biosql/location.rb +32 -0
  139. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  140. data/lib/bio/io/biosql/ontology.rb +10 -0
  141. data/lib/bio/io/biosql/reference.rb +9 -0
  142. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  143. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  144. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  145. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  146. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  147. data/lib/bio/io/biosql/taxon.rb +12 -0
  148. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  149. data/lib/bio/io/biosql/term.rb +27 -0
  150. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  151. data/lib/bio/io/biosql/term_path.rb +12 -0
  152. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  153. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  154. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  155. data/lib/bio/io/das.rb +461 -0
  156. data/lib/bio/io/dbget.rb +194 -0
  157. data/lib/bio/io/ddbjxml.rb +638 -0
  158. data/lib/bio/io/ebisoap.rb +158 -0
  159. data/lib/bio/io/ensembl.rb +229 -0
  160. data/lib/bio/io/fastacmd.rb +163 -0
  161. data/lib/bio/io/fetch.rb +195 -0
  162. data/lib/bio/io/flatfile.rb +482 -0
  163. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  164. data/lib/bio/io/flatfile/bdb.rb +253 -0
  165. data/lib/bio/io/flatfile/buffer.rb +237 -0
  166. data/lib/bio/io/flatfile/index.rb +1381 -0
  167. data/lib/bio/io/flatfile/indexer.rb +805 -0
  168. data/lib/bio/io/flatfile/splitter.rb +297 -0
  169. data/lib/bio/io/higet.rb +73 -0
  170. data/lib/bio/io/hinv.rb +442 -0
  171. data/lib/bio/io/keggapi.rb +805 -0
  172. data/lib/bio/io/ncbirest.rb +733 -0
  173. data/lib/bio/io/ncbisoap.rb +155 -0
  174. data/lib/bio/io/pubmed.rb +307 -0
  175. data/lib/bio/io/registry.rb +292 -0
  176. data/lib/bio/io/soapwsdl.rb +119 -0
  177. data/lib/bio/io/sql.rb +186 -0
  178. data/lib/bio/location.rb +867 -0
  179. data/lib/bio/map.rb +410 -0
  180. data/lib/bio/pathway.rb +960 -0
  181. data/lib/bio/reference.rb +602 -0
  182. data/lib/bio/sequence.rb +456 -0
  183. data/lib/bio/sequence/aa.rb +152 -0
  184. data/lib/bio/sequence/adapter.rb +108 -0
  185. data/lib/bio/sequence/common.rb +310 -0
  186. data/lib/bio/sequence/compat.rb +123 -0
  187. data/lib/bio/sequence/dblink.rb +54 -0
  188. data/lib/bio/sequence/format.rb +358 -0
  189. data/lib/bio/sequence/format_raw.rb +23 -0
  190. data/lib/bio/sequence/generic.rb +24 -0
  191. data/lib/bio/sequence/na.rb +491 -0
  192. data/lib/bio/shell.rb +44 -0
  193. data/lib/bio/shell/core.rb +578 -0
  194. data/lib/bio/shell/demo.rb +146 -0
  195. data/lib/bio/shell/interface.rb +218 -0
  196. data/lib/bio/shell/irb.rb +95 -0
  197. data/lib/bio/shell/object.rb +71 -0
  198. data/lib/bio/shell/plugin/blast.rb +42 -0
  199. data/lib/bio/shell/plugin/codon.rb +218 -0
  200. data/lib/bio/shell/plugin/das.rb +58 -0
  201. data/lib/bio/shell/plugin/emboss.rb +23 -0
  202. data/lib/bio/shell/plugin/entry.rb +105 -0
  203. data/lib/bio/shell/plugin/flatfile.rb +101 -0
  204. data/lib/bio/shell/plugin/keggapi.rb +181 -0
  205. data/lib/bio/shell/plugin/midi.rb +430 -0
  206. data/lib/bio/shell/plugin/obda.rb +45 -0
  207. data/lib/bio/shell/plugin/psort.rb +56 -0
  208. data/lib/bio/shell/plugin/seq.rb +247 -0
  209. data/lib/bio/shell/plugin/soap.rb +87 -0
  210. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +29 -0
  211. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +4 -0
  212. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +27 -0
  213. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +11 -0
  214. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +4 -0
  215. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +7 -0
  216. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
  217. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
  218. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
  219. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +368 -0
  220. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +47 -0
  221. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +144 -0
  222. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +47 -0
  223. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +8 -0
  224. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +10 -0
  225. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +26 -0
  226. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
  227. data/lib/bio/shell/script.rb +25 -0
  228. data/lib/bio/shell/setup.rb +109 -0
  229. data/lib/bio/shell/web.rb +102 -0
  230. data/lib/bio/tree.rb +852 -0
  231. data/lib/bio/util/color_scheme.rb +191 -0
  232. data/lib/bio/util/color_scheme/buried.rb +59 -0
  233. data/lib/bio/util/color_scheme/helix.rb +59 -0
  234. data/lib/bio/util/color_scheme/hydropathy.rb +64 -0
  235. data/lib/bio/util/color_scheme/nucleotide.rb +31 -0
  236. data/lib/bio/util/color_scheme/strand.rb +59 -0
  237. data/lib/bio/util/color_scheme/taylor.rb +50 -0
  238. data/lib/bio/util/color_scheme/turn.rb +59 -0
  239. data/lib/bio/util/color_scheme/zappo.rb +50 -0
  240. data/lib/bio/util/contingency_table.rb +370 -0
  241. data/lib/bio/util/restriction_enzyme.rb +228 -0
  242. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  243. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  244. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  245. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  246. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  247. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  248. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  249. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  250. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  251. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  252. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  253. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  254. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  255. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  256. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  257. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  258. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  259. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  260. data/lib/bio/util/restriction_enzyme/single_strand.rb +200 -0
  261. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  262. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  263. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  264. data/lib/bio/util/sirna.rb +288 -0
  265. data/test/data/HMMER/hmmpfam.out +64 -0
  266. data/test/data/HMMER/hmmsearch.out +88 -0
  267. data/test/data/SOSUI/sample.report +11 -0
  268. data/test/data/TMHMM/sample.report +21 -0
  269. data/test/data/aaindex/DAYM780301 +30 -0
  270. data/test/data/aaindex/PRAM900102 +20 -0
  271. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  272. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  273. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  274. data/test/data/blast/b0002.faa +15 -0
  275. data/test/data/blast/b0002.faa.m0 +128 -0
  276. data/test/data/blast/b0002.faa.m7 +65 -0
  277. data/test/data/blast/b0002.faa.m8 +1 -0
  278. data/test/data/blast/blastp-multi.m7 +188 -0
  279. data/test/data/command/echoarg2.bat +1 -0
  280. data/test/data/embl/AB090716.embl +65 -0
  281. data/test/data/embl/AB090716.embl.rel89 +63 -0
  282. data/test/data/fasta/example1.txt +75 -0
  283. data/test/data/fasta/example2.txt +21 -0
  284. data/test/data/genscan/sample.report +63 -0
  285. data/test/data/iprscan/merged.raw +32 -0
  286. data/test/data/iprscan/merged.txt +74 -0
  287. data/test/data/paml/codeml/control_file.txt +30 -0
  288. data/test/data/paml/codeml/output.txt +78 -0
  289. data/test/data/paml/codeml/rates +217 -0
  290. data/test/data/prosite/prosite.dat +2233 -0
  291. data/test/data/refseq/nm_126355.entret +64 -0
  292. data/test/data/rpsblast/misc.rpsblast +193 -0
  293. data/test/data/soft/GDS100_partial.soft +92 -0
  294. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  295. data/test/data/uniprot/p53_human.uniprot +1456 -0
  296. data/test/functional/bio/appl/test_pts1.rb +115 -0
  297. data/test/functional/bio/io/test_ensembl.rb +229 -0
  298. data/test/functional/bio/io/test_soapwsdl.rb +52 -0
  299. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  300. data/test/functional/bio/test_command.rb +301 -0
  301. data/test/runner.rb +14 -0
  302. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  303. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  304. data/test/unit/bio/appl/blast/test_report.rb +1135 -0
  305. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  306. data/test/unit/bio/appl/genscan/test_report.rb +182 -0
  307. data/test/unit/bio/appl/hmmer/test_report.rb +342 -0
  308. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  309. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  310. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  311. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  312. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  313. data/test/unit/bio/appl/sosui/test_report.rb +81 -0
  314. data/test/unit/bio/appl/targetp/test_report.rb +146 -0
  315. data/test/unit/bio/appl/test_blast.rb +277 -0
  316. data/test/unit/bio/appl/test_fasta.rb +130 -0
  317. data/test/unit/bio/appl/test_psort.rb +57 -0
  318. data/test/unit/bio/appl/test_pts1.rb +77 -0
  319. data/test/unit/bio/appl/tmhmm/test_report.rb +126 -0
  320. data/test/unit/bio/data/test_aa.rb +90 -0
  321. data/test/unit/bio/data/test_codontable.rb +107 -0
  322. data/test/unit/bio/data/test_na.rb +80 -0
  323. data/test/unit/bio/db/embl/test_common.rb +117 -0
  324. data/test/unit/bio/db/embl/test_embl.rb +214 -0
  325. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  326. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  327. data/test/unit/bio/db/embl/test_sptr.rb +1812 -0
  328. data/test/unit/bio/db/embl/test_uniprot.rb +31 -0
  329. data/test/unit/bio/db/kegg/test_genes.rb +45 -0
  330. data/test/unit/bio/db/pdb/test_pdb.rb +152 -0
  331. data/test/unit/bio/db/test_aaindex.rb +197 -0
  332. data/test/unit/bio/db/test_fasta.rb +250 -0
  333. data/test/unit/bio/db/test_gff.rb +1190 -0
  334. data/test/unit/bio/db/test_lasergene.rb +95 -0
  335. data/test/unit/bio/db/test_medline.rb +127 -0
  336. data/test/unit/bio/db/test_newick.rb +293 -0
  337. data/test/unit/bio/db/test_nexus.rb +364 -0
  338. data/test/unit/bio/db/test_prosite.rb +1437 -0
  339. data/test/unit/bio/db/test_rebase.rb +101 -0
  340. data/test/unit/bio/db/test_soft.rb +138 -0
  341. data/test/unit/bio/db/test_url.rb +36 -0
  342. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  343. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  344. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  345. data/test/unit/bio/io/test_ddbjxml.rb +80 -0
  346. data/test/unit/bio/io/test_ensembl.rb +109 -0
  347. data/test/unit/bio/io/test_fastacmd.rb +42 -0
  348. data/test/unit/bio/io/test_flatfile.rb +505 -0
  349. data/test/unit/bio/io/test_soapwsdl.rb +32 -0
  350. data/test/unit/bio/sequence/test_aa.rb +115 -0
  351. data/test/unit/bio/sequence/test_common.rb +373 -0
  352. data/test/unit/bio/sequence/test_compat.rb +69 -0
  353. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  354. data/test/unit/bio/sequence/test_na.rb +330 -0
  355. data/test/unit/bio/shell/plugin/test_seq.rb +185 -0
  356. data/test/unit/bio/test_alignment.rb +1025 -0
  357. data/test/unit/bio/test_command.rb +349 -0
  358. data/test/unit/bio/test_db.rb +96 -0
  359. data/test/unit/bio/test_feature.rb +144 -0
  360. data/test/unit/bio/test_location.rb +599 -0
  361. data/test/unit/bio/test_map.rb +230 -0
  362. data/test/unit/bio/test_pathway.rb +499 -0
  363. data/test/unit/bio/test_reference.rb +252 -0
  364. data/test/unit/bio/test_sequence.rb +329 -0
  365. data/test/unit/bio/test_shell.rb +18 -0
  366. data/test/unit/bio/test_tree.rb +593 -0
  367. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  368. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  369. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  370. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +101 -0
  371. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  372. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  373. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  374. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  375. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  376. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  377. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  378. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  379. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  380. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  381. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  382. data/test/unit/bio/util/test_color_scheme.rb +33 -0
  383. data/test/unit/bio/util/test_contingency_table.rb +94 -0
  384. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  385. data/test/unit/bio/util/test_sirna.rb +245 -0
  386. metadata +543 -0
@@ -0,0 +1,203 @@
1
+ #
2
+ # test/unit/bio/db/embl/test_embl.rb - Unit test for Bio::EMBL
3
+ #
4
+ # Copyright:: Copyright (C) 2005, 2008
5
+ # Mitsuteru Nakao <n@bioruby.org>
6
+ # Jan Aerts <jan.aerts@bbsrc.ac.uk>
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id:$
10
+ #
11
+
12
+ require 'pathname'
13
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
14
+ $:.unshift(libpath) unless $:.include?(libpath)
15
+
16
+ require 'test/unit'
17
+ require 'bio'
18
+ require 'bio/db/embl/embl'
19
+
20
+ module Bio
21
+ class TestEMBLToBioSequence < Test::Unit::TestCase
22
+
23
+ def setup
24
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
25
+ input = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
26
+ embl_object = Bio::EMBL.new(input)
27
+ embl_object.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
28
+ @bio_seq = embl_object.to_biosequence
29
+ end
30
+
31
+ def test_entry_id
32
+ assert_equal('AB090716', @bio_seq.entry_id)
33
+ end
34
+
35
+ def test_primary_accession
36
+ assert_equal('AB090716', @bio_seq.primary_accession)
37
+ end
38
+
39
+ def test_secondary_accessions
40
+ assert_equal([], @bio_seq.secondary_accessions)
41
+ end
42
+
43
+ def test_molecule_type
44
+ assert_equal('genomic DNA', @bio_seq.molecule_type)
45
+ end
46
+
47
+ def test_definition
48
+ assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq.definition)
49
+ end
50
+
51
+ def test_topology
52
+ assert_equal('linear', @bio_seq.topology)
53
+ end
54
+
55
+ def test_date_created
56
+ # '25-OCT-2002 (Rel. 73, Created)'
57
+ assert_equal(Date.parse('25-OCT-2002'), @bio_seq.date_created)
58
+ end
59
+
60
+ def test_date_modified
61
+ # '14-NOV-2006 (Rel. 89, Last updated, Version 3)'
62
+ assert_equal(Date.parse('14-NOV-2006'), @bio_seq.date_modified)
63
+ end
64
+
65
+ def test_release_created
66
+ assert_equal('73', @bio_seq.release_created)
67
+ end
68
+
69
+ def test_release_modified
70
+ assert_equal('89', @bio_seq.release_modified)
71
+ end
72
+
73
+ def test_entry_version
74
+ assert_equal('3', @bio_seq.entry_version)
75
+ end
76
+
77
+ def test_division
78
+ assert_equal('VRT', @bio_seq.division)
79
+ end
80
+
81
+ def test_sequence_version
82
+ assert_equal(1, @bio_seq.sequence_version)
83
+ end
84
+
85
+ def test_keywords
86
+ assert_equal([], @bio_seq.keywords)
87
+ end
88
+
89
+ def test_species
90
+ assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq.species)
91
+ end
92
+
93
+ def test_classification
94
+ assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq.classification)
95
+
96
+
97
+ end
98
+
99
+ def test_references
100
+ assert_equal(2, @bio_seq.references.length)
101
+ assert_equal(Bio::Reference, @bio_seq.references[0].class)
102
+ end
103
+
104
+ def test_features
105
+ assert_equal(3, @bio_seq.features.length)
106
+ assert_equal(Bio::Feature, @bio_seq.features[0].class)
107
+ end
108
+
109
+ end
110
+
111
+ # To really test the Bio::EMBL to Bio::Sequence conversion, we need to test if
112
+ # that Bio::Sequence can be made into a valid Bio::EMBL again.
113
+ class TestEMBLToBioSequenceRoundTrip < Test::Unit::TestCase
114
+ def setup
115
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
116
+ input = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
117
+ embl_object_1 = Bio::EMBL.new(input)
118
+ embl_object_1.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
119
+ @bio_seq_1 = embl_object_1.to_biosequence
120
+ embl_object_2 = Bio::EMBL.new(@bio_seq_1.output(:embl))
121
+ @bio_seq_2 = embl_object_2.to_biosequence
122
+ end
123
+
124
+ def test_entry_id
125
+ assert_equal('AB090716', @bio_seq_2.entry_id)
126
+ end
127
+
128
+ def test_primary_accession
129
+ assert_equal('AB090716', @bio_seq_2.primary_accession)
130
+ end
131
+
132
+ def test_secondary_accessions
133
+ assert_equal([], @bio_seq_2.secondary_accessions)
134
+ end
135
+
136
+ def test_molecule_type
137
+ assert_equal('genomic DNA', @bio_seq_2.molecule_type)
138
+ end
139
+
140
+ def test_definition
141
+ assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq_2.definition)
142
+ end
143
+
144
+ def test_topology
145
+ assert_equal('linear', @bio_seq_2.topology)
146
+ end
147
+
148
+ def test_date_created
149
+ # '25-OCT-2002 (Rel. 73, Created)'
150
+ assert_equal(Date.parse('25-OCT-2002'), @bio_seq_2.date_created)
151
+ end
152
+
153
+ def test_date_modified
154
+ # '14-NOV-2006 (Rel. 89, Last updated, Version 3)'
155
+ assert_equal(Date.parse('14-NOV-2006'), @bio_seq_2.date_modified)
156
+ end
157
+
158
+ def test_release_created
159
+ assert_equal('73', @bio_seq_2.release_created)
160
+ end
161
+
162
+ def test_release_modified
163
+ assert_equal('89', @bio_seq_2.release_modified)
164
+ end
165
+
166
+ def test_entry_version
167
+ assert_equal('3', @bio_seq_2.entry_version)
168
+ end
169
+
170
+ def test_division
171
+ assert_equal('VRT', @bio_seq_2.division)
172
+ end
173
+
174
+ def test_sequence_version
175
+ assert_equal(1, @bio_seq_2.sequence_version)
176
+ end
177
+
178
+ def test_keywords
179
+ assert_equal([], @bio_seq_2.keywords)
180
+ end
181
+
182
+ def test_species
183
+ assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq_2.species)
184
+ end
185
+
186
+ def test_classification
187
+ assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq_2.classification)
188
+
189
+
190
+ end
191
+
192
+ def test_references
193
+ assert_equal(2, @bio_seq_2.references.length)
194
+ assert_equal(Bio::Reference, @bio_seq_2.references[0].class)
195
+ end
196
+
197
+ def test_features
198
+ assert_equal(3, @bio_seq_2.features.length)
199
+ assert_equal(Bio::Feature, @bio_seq_2.features[0].class)
200
+ end
201
+ end
202
+ end
203
+
@@ -0,0 +1,1812 @@
1
+ #
2
+ # test/unit/bio/db/embl/test_sptr.rb - Unit test for Bio::SPTR
3
+ #
4
+ # Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+
10
+ require 'pathname'
11
+ libpath = Pathname.new(File.join(File.dirname(__FILE__),
12
+ ['..'] * 5, 'lib')).cleanpath.to_s
13
+ $:.unshift(libpath) unless $:.include?(libpath)
14
+
15
+ require 'test/unit'
16
+ require 'bio/db/embl/sptr'
17
+
18
+ module Bio
19
+ class TestSPTR < Test::Unit::TestCase
20
+
21
+ def setup
22
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__),
23
+ ['..'] * 5)).cleanpath.to_s
24
+ data = File.open(File.join(bioruby_root,
25
+ 'test', 'data', 'uniprot',
26
+ 'p53_human.uniprot')).read
27
+ @obj = Bio::SPTR.new(data)
28
+ end
29
+
30
+ def test_id_line
31
+ assert(@obj.id_line)
32
+ end
33
+
34
+ def test_id_line_entry_name
35
+ assert_equal('P53_HUMAN', @obj.id_line('ENTRY_NAME'))
36
+ end
37
+
38
+ def test_id_line_data_class
39
+ assert_equal('STANDARD', @obj.id_line('DATA_CLASS'))
40
+ end
41
+
42
+ def test_id_line_molecule_type
43
+ assert_equal('PRT', @obj.id_line('MOLECULE_TYPE'))
44
+ end
45
+
46
+ def test_id_line_sequence_length
47
+ assert_equal(393, @obj.id_line('SEQUENCE_LENGTH'))
48
+ end
49
+
50
+ def test_entry
51
+ entry = 'P53_HUMAN'
52
+ assert_equal(entry, @obj.entry)
53
+ assert_equal(entry, @obj.entry_name)
54
+ assert_equal(entry, @obj.entry_id)
55
+ end
56
+
57
+ def test_molecule
58
+ assert_equal('PRT', @obj.molecule)
59
+ assert_equal('PRT', @obj.molecule_type)
60
+ end
61
+
62
+ def test_sequence_length
63
+ seqlen = 393
64
+ assert_equal(seqlen, @obj.sequence_length)
65
+ assert_equal(seqlen, @obj.aalen)
66
+ end
67
+
68
+ def test_ac
69
+ acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807",
70
+ "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q86UG1",
71
+ "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2",
72
+ "Q9NZD0", "Q9UBI2", "Q9UQ61"]
73
+ assert_equal(acs, @obj.ac)
74
+ assert_equal(acs, @obj.accessions)
75
+ end
76
+
77
+ def test_accession
78
+ assert_equal('P04637', @obj.accession)
79
+ end
80
+
81
+ def test_dr
82
+ assert_equal(17, @obj.dr.size)
83
+ assert_equal(27, @obj.dr['GO'].size)
84
+ assert_equal([["IPR002117", "P53"],
85
+ ["IPR011615", "P53_DNA_bd"],
86
+ ["IPR012346", "P53_RUNT_DNA_bd"],
87
+ ["IPR010991", "p53_tetrameristn"]],
88
+ @obj.dr['InterPro'])
89
+ end
90
+
91
+ def test_dr_with_key
92
+ pfam = [
93
+ { " " => "1",
94
+ "Version" => "P53",
95
+ "Accession" => "PF00870",
96
+ "Molecular Type" => nil
97
+ },
98
+ { " " => "1",
99
+ "Version" => "P53_tetramer",
100
+ "Accession" => "PF07710",
101
+ "Molecular Type" => nil
102
+ }
103
+ ]
104
+ assert_equal(pfam, @obj.dr('Pfam'))
105
+ embl3 = {
106
+ " " => "JOINED",
107
+ "Version" => "AAA59987.1",
108
+ "Accession" => "M13113",
109
+ "Molecular Type" => "Genomic_DNA"
110
+ }
111
+ assert_equal(embl3, @obj.dr('EMBL')[3])
112
+ end
113
+
114
+ def test_dr_with_key_empty
115
+ assert_equal([], @obj.dr('NOT_A_DATABASE'))
116
+ end
117
+
118
+ def test_dt
119
+ assert(@obj.dt)
120
+ end
121
+
122
+ def test_dt_created
123
+ assert_equal('13-AUG-1987 (Rel. 05, Created)', @obj.dt('created'))
124
+ end
125
+
126
+ def test_dt_sequence
127
+ assert_equal('01-MAR-1989 (Rel. 10, Last sequence update)',
128
+ @obj.dt('sequence'))
129
+ end
130
+
131
+ def test_dt_annotation
132
+ assert_equal('13-SEP-2005 (Rel. 48, Last annotation update)',
133
+ @obj.dt('annotation'))
134
+ end
135
+
136
+ def test_de
137
+ assert(@obj.de)
138
+ end
139
+
140
+ def test_protein_name
141
+ assert_equal("Cellular tumor antigen p53", @obj.protein_name)
142
+ end
143
+
144
+ def test_synonyms
145
+ ary = ["Tumor suppressor p53", "Phosphoprotein p53", "Antigen NY-CO-13"]
146
+ assert_equal(ary, @obj.synonyms)
147
+ end
148
+
149
+ def test_gn
150
+ assert_equal([{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}],
151
+ @obj.gn)
152
+ end
153
+
154
+ def test_gn_uniprot_parser
155
+ gn_uniprot_data = ''
156
+ assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}],
157
+ @obj.instance_eval("gn_uniprot_parser"))
158
+ end
159
+
160
+ def test_gn_old_parser
161
+ gn_old_data = ''
162
+ assert_equal([["Name=TP53; Synonyms=P53;"]],
163
+ @obj.instance_eval("gn_old_parser"))
164
+ end
165
+
166
+ def test_gene_names
167
+ assert_equal(["TP53"], @obj.gene_names)
168
+ end
169
+
170
+ def test_gene_name
171
+ assert_equal('TP53', @obj.gene_name)
172
+ end
173
+
174
+ def test_os
175
+ assert(@obj.os)
176
+ end
177
+
178
+ def test_os_access
179
+ assert_equal("Homo sapiens (Human)", @obj.os(0))
180
+ end
181
+
182
+ def test_os_access2
183
+ assert_equal({"name"=>"(Human)", "os"=>"Homo sapiens"}, @obj.os[0])
184
+ end
185
+
186
+ def test_og_1
187
+ og = "OG Plastid; Chloroplast."
188
+ ary = ['Plastid', 'Chloroplast']
189
+ @obj.instance_eval("@orig['OG'] = '#{og}'")
190
+ assert_equal(ary, @obj.og)
191
+ end
192
+
193
+ def test_og_2
194
+ og = "OG Mitochondrion."
195
+ ary = ['Mitochondrion']
196
+ @obj.instance_eval("@orig['OG'] = '#{og}'")
197
+ assert_equal(ary, @obj.og)
198
+ end
199
+
200
+ def test_og_3
201
+ og = "OG Plasmid sym pNGR234a."
202
+ ary = ["Plasmid sym pNGR234a"]
203
+ @obj.instance_eval("@orig['OG'] = '#{og}'")
204
+ assert_equal(ary, @obj.og)
205
+ end
206
+
207
+ def test_og_4
208
+ og = "OG Plastid; Cyanelle."
209
+ ary = ['Plastid', 'Cyanelle']
210
+ @obj.instance_eval("@orig['OG'] = '#{og}'")
211
+ assert_equal(ary, @obj.og)
212
+ end
213
+
214
+ def test_og_5
215
+ og = "OG Plasmid pSymA (megaplasmid 1)."
216
+ ary = ["Plasmid pSymA (megaplasmid 1)"]
217
+ @obj.instance_eval("@orig['OG'] = '#{og}'")
218
+ assert_equal(ary, @obj.og)
219
+ end
220
+
221
+ def test_og_6
222
+ og = "OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1."
223
+ ary = ['Plasmid pNRC100', 'Plasmid pNRC200', 'Plasmid pHH1']
224
+ @obj.instance_eval("@orig['OG'] = '#{og}'")
225
+ assert_equal(ary, @obj.og)
226
+ end
227
+
228
+ def test_oc
229
+ assert_equal(["Eukaryota", "Metazoa", "Chordata", "Craniata",
230
+ "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria",
231
+ "Euarchontoglires", "Primates", "Catarrhini", "Hominidae",
232
+ "Homo"],
233
+ @obj.oc)
234
+ end
235
+
236
+ def test_ox
237
+ assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox)
238
+ end
239
+
240
+ def test_ref # Bio::SPTR#ref
241
+ assert_equal(Array, @obj.ref.class)
242
+ end
243
+
244
+ def test_cc
245
+ assert_equal(Hash, @obj.cc.class)
246
+ end
247
+
248
+ def test_cc_database
249
+ db = [{"NAME" => "IARC TP53 mutation database",
250
+ "WWW" => "http://www.iarc.fr/p53/",
251
+ "FTP" => nil, "NOTE" => "IARC db of somatic p53 mutations"},
252
+ {"NAME" => "Tokyo p53",
253
+ "WWW" => "http://p53.genome.ad.jp/", "FTP" => nil,
254
+ "NOTE" => "University of Tokyo db of p53 mutations"},
255
+ {"NAME" => "p53 web site at the Institut Curie",
256
+ "WWW" => "http://p53.curie.fr/", "FTP" => nil, "NOTE" => nil},
257
+ {"NAME" => "Atlas Genet. Cytogenet. Oncol. Haematol.",
258
+ "WWW" => "http://www.infobiogen.fr/services/chromcancer/Genes/P53ID88.html",
259
+ "FTP" => nil, "NOTE" => nil}]
260
+ assert_equal(db, @obj.cc('DATABASE'))
261
+ end
262
+
263
+ def test_cc_alternative_products
264
+ ap = {"Comment" => "",
265
+ "Named isoforms" => "2",
266
+ "Variants" => [{"IsoId" => ["P04637-1"],
267
+ "Name" => "1",
268
+ "Synonyms" => [],
269
+ "Sequence" => ["Displayed"]},
270
+ {"IsoId" => ["P04637-2"],
271
+ "Name" => "2",
272
+ "Synonyms" => ["I9RET"],
273
+ "Sequence" => ["VSP_006535", "VSP_006536"]}],
274
+ "Event" => ["Alternative splicing"]}
275
+ assert_equal(ap, @obj.cc('ALTERNATIVE PRODUCTS'))
276
+ end
277
+
278
+ def test_cc_mass_spectrometry
279
+ assert_equal(nil, @obj.cc('MASS SPECTROMETRY'))
280
+ end
281
+
282
+
283
+ def test_kw
284
+ keywords = ["3D-structure", "Acetylation", "Activator",
285
+ "Alternative splicing", "Anti-oncogene",
286
+ "Apoptosis", "Cell cycle", "Disease mutation", "DNA-binding",
287
+ "Glycoprotein", "Li-Fraumeni syndrome", "Metal-binding",
288
+ "Nuclear protein", "Phosphorylation", "Polymorphism",
289
+ "Transcription", "Transcription regulation", "Zinc"]
290
+ assert_equal(keywords, @obj.kw)
291
+ end
292
+
293
+ def test_ft
294
+ assert(@obj.ft)
295
+ name = 'DNA_BIND'
296
+ assert_equal([{"FTId"=>"", "From"=>102, "diff"=>[], "To"=>292,
297
+ "Description"=>"",
298
+ "original" => ['DNA_BIND', '102', '292', '', '']}],
299
+ @obj.ft[name])
300
+ end
301
+
302
+ def test_sq
303
+ assert_equal({"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653},
304
+ @obj.sq)
305
+ end
306
+
307
+ def test_sq_crc64
308
+ assert_equal("AD5C149FD8106131", @obj.sq('CRC64'))
309
+ end
310
+
311
+ def test_sq_mw
312
+ mw = 43653
313
+ assert_equal(mw, @obj.sq('mw'))
314
+ assert_equal(mw, @obj.sq('molecular'))
315
+ assert_equal(mw, @obj.sq('weight'))
316
+ end
317
+
318
+ def test_sq_len
319
+ length = 393
320
+ assert_equal(length, @obj.sq('len'))
321
+ assert_equal(length, @obj.sq('length'))
322
+ assert_equal(length, @obj.sq('AA'))
323
+ end
324
+
325
+ def test_seq
326
+ seq = 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD'
327
+ assert_equal(seq, @obj.seq)
328
+ assert_equal(seq, @obj.aaseq)
329
+ end
330
+
331
+ end # class TestSPTR
332
+
333
+
334
+
335
+ class TestSPTRCC < Test::Unit::TestCase
336
+ def test_allergen
337
+ # ALLERGEN Information relevant to allergenic proteins
338
+ data = 'CC -!- ALLERGEN: Causes an allergic reaction in human.'
339
+ sp = Bio::SPTR.new(data)
340
+ assert_equal(['Causes an allergic reaction in human.'],
341
+ sp.cc['ALLERGEN'])
342
+ assert_equal(['Causes an allergic reaction in human.'],
343
+ sp.cc('ALLERGEN'))
344
+ end
345
+
346
+ def test_alternative_products_access_as_hash
347
+ data = "CC -!- ALTERNATIVE PRODUCTS:
348
+ CC Event=Alternative initiation; Named isoforms=2;
349
+ CC Name=Long;
350
+ CC IsoId=P68250-1; Sequence=Displayed;
351
+ CC Name=Short;
352
+ CC IsoId=P68250-2; Sequence=VSP_018631;
353
+ CC Note=Contains a N-acetylmethionine at position 1 (By
354
+ CC similarity);"
355
+
356
+ res = ["Event=Alternative initiation; Named isoforms=2; Name=Long; IsoId=P68250-1; Sequence=Displayed; Name=Short; IsoId=P68250-2; Sequence=VSP_018631; Note=Contains a N-acetylmethionine at position 1 (By similarity);"]
357
+ sp = Bio::SPTR.new(data)
358
+ assert_equal(res,
359
+ sp.cc['ALTERNATIVE PRODUCTS'])
360
+ end
361
+
362
+ def test_alternative_products_ai
363
+ # ALTERNATIVE PRODUCTS Description of the existence of related protein sequence(s) produced by alternative splicing of the same gene, alternative promoter usage, ribosomal frameshifting or by the use of alternative initiation codons; see 3.21.15
364
+ # Alternative promoter usage, Alternative splicing, Alternative initiation, Ribosomal frameshifting
365
+ data = "CC -!- ALTERNATIVE PRODUCTS:
366
+ CC Event=Alternative initiation; Named isoforms=2;
367
+ CC Name=Long;
368
+ CC IsoId=P68250-1; Sequence=Displayed;
369
+ CC Name=Short;
370
+ CC IsoId=P68250-2; Sequence=VSP_018631;
371
+ CC Note=Contains a N-acetylmethionine at position 1 (By
372
+ CC similarity);"
373
+
374
+ sp = Bio::SPTR.new(data)
375
+ assert_equal({"Comment"=>"",
376
+ "Named isoforms"=>"2",
377
+ "Variants"=>
378
+ [{"IsoId"=>["P68250-1"],
379
+ "Name"=>"Long",
380
+ "Synonyms" => [],
381
+ "Sequence"=>["Displayed"]},
382
+ {"IsoId"=>["P68250-2"],
383
+ "Name"=>"Short",
384
+ "Synonyms" => [],
385
+ "Sequence"=>["VSP_018631"]}],
386
+ "Event"=>["Alternative initiation"]},
387
+ sp.cc('ALTERNATIVE PRODUCTS'))
388
+ end
389
+ def test_alternative_products_as
390
+ data = "CC -!- ALTERNATIVE PRODUCTS:
391
+ CC Event=Alternative splicing; Named isoforms=2;
392
+ CC Name=1;
393
+ CC IsoId=P04637-1; Sequence=Displayed;
394
+ CC Name=2; Synonyms=I9RET;
395
+ CC IsoId=P04637-2; Sequence=VSP_006535, VSP_006536;
396
+ CC Note=Seems to be non-functional. Expressed in quiescent
397
+ CC lymphocytes;"
398
+ sp = Bio::SPTR.new(data)
399
+ assert_equal({"Comment"=>"",
400
+ "Named isoforms"=>"2",
401
+ "Variants"=>
402
+ [{"Name"=>"1",
403
+ "IsoId"=>["P04637-1"],
404
+ "Synonyms"=>[],
405
+ "Sequence"=>["Displayed"]},
406
+ {"IsoId"=>["P04637-2"],
407
+ "Name"=>"2",
408
+ "Synonyms"=>["I9RET"],
409
+ "Sequence"=>["VSP_006535", "VSP_006536"]}],
410
+ "Event"=>["Alternative splicing"]},
411
+ sp.cc('ALTERNATIVE PRODUCTS'))
412
+ end
413
+ def test_alternative_products_apu
414
+ data = "CC -!- ALTERNATIVE PRODUCTS:
415
+ CC Event=Alternative promoter usage, Alternative splicing; Named isoforms=5;
416
+ CC Comment=Additional isoforms (AAT-1L and AAT-1S) may exist;
417
+ CC Name=1; Synonyms=AAT-1M;
418
+ CC IsoId=Q7Z4T9-1; Sequence=Displayed;
419
+ CC Name=2;
420
+ CC IsoId=Q7Z4T9-2; Sequence=VSP_014910, VSP_014911;
421
+ CC Note=No experimental confirmation available;
422
+ CC Name=3;
423
+ CC IsoId=Q7Z4T9-3; Sequence=VSP_014907, VSP_014912;
424
+ CC Name=4; Synonyms=AAT1-alpha;
425
+ CC IsoId=Q7Z4T9-4; Sequence=VSP_014908;
426
+ CC Note=May be produced by alternative promoter usage;
427
+ CC Name=5; Synonyms=AAT1-beta, AAT1-gamma;
428
+ CC IsoId=Q7Z4T9-5; Sequence=VSP_014909;
429
+ CC Note=May be produced by alternative promoter usage;"
430
+ sp = Bio::SPTR.new(data)
431
+ assert_equal({"Comment"=>"Additional isoforms (AAT-1L and AAT-1S) may exist",
432
+ "Named isoforms"=>"5",
433
+ "Variants"=>
434
+ [{"Name"=>"1",
435
+ "IsoId"=>["Q7Z4T9-1"],
436
+ "Synonyms"=>["AAT-1M"],
437
+ "Sequence"=>["Displayed"]},
438
+ {"Name"=>"2",
439
+ "IsoId"=>["Q7Z4T9-2"],
440
+ "Synonyms" => [],
441
+ "Sequence"=>["VSP_014910", "VSP_014911"]},
442
+ {"Name"=>"3",
443
+ "IsoId"=>["Q7Z4T9-3"],
444
+ "Synonyms" => [],
445
+ "Sequence"=>["VSP_014907", "VSP_014912"]},
446
+ {"Name"=>"4",
447
+ "IsoId"=>["Q7Z4T9-4"],
448
+ "Synonyms"=>["AAT1-alpha"],
449
+ "Sequence"=>["VSP_014908"]},
450
+ {"Name"=>"5",
451
+ "IsoId"=>["Q7Z4T9-5"],
452
+ "Synonyms"=>["AAT1-beta", "AAT1-gamma"],
453
+ "Sequence"=>["VSP_014909"]}],
454
+ "Event"=>["Alternative promoter usage", "Alternative splicing"]},
455
+ sp.cc('ALTERNATIVE PRODUCTS'))
456
+ end
457
+ def test_alternative_products_rf
458
+ data = ""
459
+ sp = Bio::SPTR.new(data)
460
+ assert_equal({},
461
+ sp.cc('ALTERNATIVE PRODUCTS'))
462
+ end
463
+
464
+ def test_biophysicochemical_properties
465
+ # BIOPHYSICOCHEMICAL PROPERTIES Description of the information relevant to biophysical and physicochemical data and information on pH dependence, temperature dependence, kinetic parameters, redox potentials, and maximal absorption; see 3.21.8
466
+ #
467
+ data = 'CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
468
+ CC Kinetic parameters:
469
+ CC KM=45 uM for AdoMet;
470
+ CC Vmax=32 uM/h/mg enzyme;
471
+ CC pH dependence:
472
+ CC Optimum pH is 8.2;'
473
+ sp = Bio::SPTR.new(data)
474
+ assert_equal(["Kinetic parameters: KM=45 uM for AdoMet; Vmax=32 uM/h/mg enzyme; pH dependence: Optimum pH is 8.2;"],
475
+ sp.cc['BIOPHYSICOCHEMICAL PROPERTIES'])
476
+ assert_equal({"Redox potential" => "",
477
+ "Temperature dependence" => "",
478
+ "Kinetic parameters" => {"KM" => "45 uM for AdoMet",
479
+ "Vmax" => "32 uM/h/mg enzyme"},
480
+ "Absorption" => {},
481
+ "pH dependence" => "Optimum pH is 8.2"},
482
+ sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
483
+
484
+ # 3.12.2. Syntax of the topic 'BIOPHYSICOCHEMICAL PROPERTIES'
485
+ data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
486
+ CC Absorption:
487
+ CC Abs(max)=xx nm;
488
+ CC Note=free_text;
489
+ CC Kinetic parameters:
490
+ CC KM=xx unit for substrate [(free_text)];
491
+ CC Vmax=xx unit enzyme [free_text];
492
+ CC Note=free_text;
493
+ CC pH dependence:
494
+ CC free_text;
495
+ CC Redox potential:
496
+ CC free_text;
497
+ CC Temperature dependence:
498
+ CC free_text;"
499
+ sp = Bio::SPTR.new(data)
500
+ assert_equal({"Redox potential"=>"free_text",
501
+ "Temperature dependence"=>"free_text",
502
+ "Kinetic parameters"=>
503
+ {"KM"=>"xx unit for substrate [(free_text)]",
504
+ "Note"=>"free_text",
505
+ "Vmax"=>"xx unit enzyme [free_text]"},
506
+ "Absorption"=>{"Note"=>"free_text", "Abs(max)"=>"xx nm"},
507
+ "pH dependence"=>"free_text"},
508
+ sp.cc('BIOPHYSICOCHEMICAL PROPERTIES'))
509
+ end
510
+
511
+
512
+ def test_biotechnology
513
+ # BIOTECHNOLOGY Description of the use of a specific protein in a biotechnological process
514
+ data = 'CC -!- BIOTECHNOLOGY: Introduced by genetic manipulation and expressed in
515
+ CC improved ripening tomato by Monsanto. ACC is the immediate
516
+ CC precursor of the phytohormone ethylene which is involved in the
517
+ CC control of ripening. ACC deaminase reduces ethylene biosynthesis
518
+ CC and thus extends the shelf life of fruits and vegetables.'
519
+ sp = Bio::SPTR.new(data)
520
+ assert_equal(["Introduced by genetic manipulation and expressed in improved ripening tomato by Monsanto. ACC is the immediate precursor of the phytohormone ethylene which is involved in the control of ripening. ACC deaminase reduces ethylene biosynthesis and thus extends the shelf life of fruits and vegetables."],
521
+ sp.cc['BIOTECHNOLOGY'])
522
+ end
523
+
524
+ def test_catalytic_activity
525
+ # CATALYTIC ACTIVITY Description of the reaction(s) catalyzed by an enzyme [1]
526
+ data = 'CC -!- CATALYTIC ACTIVITY: Hydrolysis of alkylated DNA, releasing 3-
527
+ CC methyladenine, 3-methylguanine, 7-methylguanine and 7-
528
+ CC methyladenine.'
529
+ sp = Bio::SPTR.new(data)
530
+ assert_equal(["Hydrolysis of alkylated DNA, releasing 3-methyladenine, 3-methylguanine, 7-methylguanine and 7-methyladenine."],
531
+ sp.cc['CATALYTIC ACTIVITY'])
532
+ end
533
+
534
+ def test_caution
535
+ # CAUTION Warning about possible errors and/or grounds for confusion
536
+ data = 'CC -!- CAUTION: Ref.1 sequence differs from that shown due to a Leu codon
537
+ CC in position 480 which was translated as a stop codon to shorten
538
+ CC the sequence.'
539
+ sp = Bio::SPTR.new(data)
540
+ assert_equal(["Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence."],
541
+ sp.cc['CAUTION'])
542
+ assert_equal("Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence.",
543
+ sp.cc('CAUTION'))
544
+
545
+ end
546
+
547
+ def test_cofactor
548
+ # COFACTOR Description of any non-protein substance required by an enzyme for its catalytic activity
549
+ data = 'CC -!- COFACTOR: Cl(-). Is unique in requiring Cl(-) for its activity.
550
+ CC -!- COFACTOR: Mg(2+).'
551
+ sp = Bio::SPTR.new(data)
552
+ assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
553
+ "Mg(2+)."],
554
+ sp.cc['COFACTOR'])
555
+
556
+ assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
557
+ "Mg(2+)."],
558
+ sp.cc('COFACTOR'))
559
+ end
560
+
561
+ def test_developmental_stage
562
+ # DEVELOPMENTAL STAGE Description of the developmentally-specific expression of mRNA or protein
563
+ data = 'CC -!- DEVELOPMENTAL STAGE: In females, isoform 1 is expressed at day 35
564
+ CC with higher levels detected at day 56. Isoform 1 is not detected
565
+ CC in males of any age.'
566
+ sp = Bio::SPTR.new(data)
567
+ assert_equal(["In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age."],
568
+ sp.cc['DEVELOPMENTAL STAGE'])
569
+ assert_equal("In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age.",
570
+ sp.cc('DEVELOPMENTAL STAGE'))
571
+ end
572
+
573
+ def test_disease
574
+ # DISEASE Description of the disease(s) associated with a deficiency of a protein
575
+ data = 'CC -!- DISEASE: Defects in APP are a cause of hereditary cerebral
576
+ CC hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This
577
+ CC disorder is characterized by amyloid deposits in cerebral vessels.
578
+ CC The principal clinical characteristics are recurring cerebral
579
+ CC hemorrhages, sometimes preceded by migrainous headaches or mental
580
+ CC cleavage. Various types of HCHWAD are known. They differ in onset
581
+ CC and aggressiveness of the disease. The Iowa type demonstrated no
582
+ CC cerebral hemorrhaging but is characterized by progressive
583
+ CC cognitive decline. Beta-APP40 is the predominant form of
584
+ CC cerebrovascular amyloid.'
585
+ sp = Bio::SPTR.new(data)
586
+ assert_equal(["Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid."],
587
+ sp.cc['DISEASE'])
588
+ assert_equal("Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid.",
589
+ sp.cc('DISEASE'))
590
+ end
591
+
592
+ def test_domain
593
+ # DOMAIN Description of the domain structure of a protein
594
+ data = 'CC -!- DOMAIN: The basolateral sorting signal (BaSS) is required for
595
+ CC sorting of membrane proteins to the basolateral surface of
596
+ CC epithelial cells.
597
+ CC -!- DOMAIN: The NPXY sequence motif found in many tyrosine-
598
+ CC phosphorylated proteins is required for the specific binding of
599
+ CC the PID domain. However, additional amino acids either N- or C-
600
+ CC terminal to the NPXY motif are often required for complete
601
+ CC interaction. The PID domain-containing proteins which bind APP
602
+ CC require the YENPTY motif for full interaction. These interactions
603
+ CC are independent of phosphorylation on the terminal tyrosine
604
+ CC residue. The NPXY site is also involved in clathrin-mediated
605
+ CC endocytosis (By similarity).'
606
+ sp = Bio::SPTR.new(data)
607
+ assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
608
+ "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
609
+ sp.cc['DOMAIN'])
610
+ assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
611
+ "The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
612
+ sp.cc('DOMAIN'))
613
+ end
614
+
615
+ def test_enzyme_regulation
616
+ # ENZYME REGULATION Description of an enzyme regulatory mechanism
617
+ data = 'CC -!- ENZYME REGULATION: Insensitive to calcium/calmodulin. Stimulated
618
+ CC by the G protein beta and gamma subunit complex.'
619
+ sp = Bio::SPTR.new(data)
620
+ assert_equal(["Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex."],
621
+ sp.cc['ENZYME REGULATION'])
622
+ assert_equal("Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex.",
623
+ sp.cc('ENZYME REGULATION'))
624
+ end
625
+
626
+ def test_function
627
+ # FUNCTION General description of the function(s) of a protein
628
+ data = 'CC -!- FUNCTION: May play a fundamental role in situations where fine
629
+ CC interplay between intracellular calcium and cAMP determines the
630
+ CC cellular function. May be a physiologically relevant docking site
631
+ CC for calcineurin (By similarity).'
632
+ sp = Bio::SPTR.new(data)
633
+ assert_equal(["May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity)."],
634
+ sp.cc['FUNCTION'])
635
+ assert_equal("May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity).",
636
+ sp.cc('FUNCTION'))
637
+ end
638
+
639
+ def test_induction
640
+ # INDUCTION Description of the compound(s) or condition(s) that regulate gene expression
641
+ data = 'CC -!- INDUCTION: By pheromone (alpha-factor).'
642
+ sp = Bio::SPTR.new(data)
643
+ assert_equal(["By pheromone (alpha-factor)."],
644
+ sp.cc['INDUCTION'])
645
+ assert_equal("By pheromone (alpha-factor).",
646
+ sp.cc('INDUCTION'))
647
+ end
648
+
649
+ def test_interaction
650
+ # INTERACTION Conveys information relevant to binary protein-protein interaction 3.21.12
651
+ data = 'CC -!- INTERACTION:
652
+ CC P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435;
653
+ CC P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;'
654
+ sp = Bio::SPTR.new(data)
655
+ assert_equal(["P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;"],
656
+ sp.cc['INTERACTION'])
657
+ assert_equal([{'SP_Ac' => 'P62158',
658
+ 'identifier' => 'CALM1',
659
+ 'optional_identifier' => '(xeno)',
660
+ 'NbExp' => '1',
661
+ 'IntAct' => ['EBI-457011', 'EBI-397435']},
662
+ {'SP_Ac' => 'P62155',
663
+ 'identifier' => 'calm1',
664
+ 'optional_identifier' => '(xeno)',
665
+ 'NbExp' => '1',
666
+ 'IntAct' => ['EBI-457011', 'EBI-397568']}],
667
+ sp.cc('INTERACTION'))
668
+ end
669
+
670
+ def test_mass_spectrometry
671
+ # MASS SPECTROMETRY Reports the exact molecular weight of a protein or part of a protein as determined by mass spectrometric methods; see 3.21.23
672
+ data = "CC -!- MASS SPECTROMETRY: MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29;
673
+ CC NOTE=Ref.1.
674
+ CC -!- MASS SPECTROMETRY: MW=2892.2; METHOD=Electrospray; RANGE=1-29;
675
+ CC NOTE=Ref.2."
676
+ sp = Bio::SPTR.new(data)
677
+ assert_equal(["MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; NOTE=Ref.1.",
678
+ "MW=2892.2; METHOD=Electrospray; RANGE=1-29; NOTE=Ref.2."],
679
+ sp.cc['MASS SPECTROMETRY'])
680
+ assert_equal([{'MW' => '2894.9',
681
+ 'MW_ERR' => '3',
682
+ 'METHOD' => 'MALDI',
683
+ 'RANGE' => '1-29',
684
+ 'NOTE' => 'Ref.1'},
685
+ {'MW' => '2892.2',
686
+ 'METHOD' => 'Electrospray',
687
+ 'MW_ERR' => nil,
688
+ 'RANGE' => '1-29',
689
+ 'NOTE' => 'Ref.2'}],
690
+ sp.cc('MASS SPECTROMETRY'))
691
+ end
692
+
693
+ def test_miscellaneous
694
+ # MISCELLANEOUS Any comment which does not belong to any of the other defined topics
695
+ data = 'CC -!- MISCELLANEOUS: There are two isozymes; a cytoplasmic one and a
696
+ CC mitochondrial one.'
697
+ sp = Bio::SPTR.new(data)
698
+ assert_equal(["There are two isozymes; a cytoplasmic one and a mitochondrial one."],
699
+ sp.cc['MISCELLANEOUS'])
700
+ end
701
+
702
+ def test_pathway
703
+ # PATHWAY Description of the metabolic pathway(s) with which a protein is associated
704
+ data = 'CC -!- PATHWAY: Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-
705
+ CC phosphate and glycerone phosphate from D-glucose: step 4.'
706
+ sp = Bio::SPTR.new(data)
707
+ assert_equal(["Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-phosphate and glycerone phosphate from D-glucose: step 4."],
708
+ sp.cc['PATHWAY'])
709
+ assert_equal(["Carbohydrate degradation",
710
+ 'glycolysis',
711
+ 'D-glyceraldehyde 3-phosphate',
712
+ 'glycerone phosphate from D-glucose',
713
+ 'step 4'],
714
+ sp.cc('PATHWAY'))
715
+ end
716
+
717
+ def test_pharmaceutical
718
+ # PHARMACEUTICAL Description of the use of a protein as a pharmaceutical drug
719
+ data = 'CC -!- PHARMACEUTICAL: Available under the names Factrel (Ayerst Labs),
720
+ CC Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm
721
+ CC (Serono). Used in evaluating hypothalamic-pituitary gonadotropic
722
+ CC function.'
723
+ sp = Bio::SPTR.new(data)
724
+ assert_equal(["Available under the names Factrel (Ayerst Labs), Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm (Serono). Used in evaluating hypothalamic-pituitary gonadotropic function."],
725
+ sp.cc['PHARMACEUTICAL'])
726
+ end
727
+
728
+ def test_polymorphism
729
+ # POLYMORPHISM Description of polymorphism(s)
730
+ data = 'CC -!- POLYMORPHISM: Position 161 is associated with platelet-specific
731
+ CC alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161.
732
+ CC Siba is involved in neonatal alloimmune thrombocytopenia (NATP).
733
+ CC -!- POLYMORPHISM: Polymorphisms arise from a variable number of tandem
734
+ CC 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-
735
+ CC like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown
736
+ CC here) contains one repeat starting at position 415, allele C
737
+ CC contains two repeats, allele B contains three repeats and allele A
738
+ CC contains four repeats.'
739
+ sp = Bio::SPTR.new(data)
740
+ assert_equal(["Position 161 is associated with platelet-specific alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. Siba is involved in neonatal alloimmune thrombocytopenia (NATP).",
741
+ "Polymorphisms arise from a variable number of tandem 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown here) contains one repeat starting at position 415, allele C contains two repeats, allele B contains three repeats and allele A contains four repeats."],
742
+ sp.cc['POLYMORPHISM'])
743
+ end
744
+
745
+ def test_ptm
746
+ # PTM Description of any chemical alternation of a polypeptide (proteolytic cleavage, amino acid modifications including crosslinks). This topic complements information given in the feature table or indicates polypeptide modifications for which position-specific data is not available.
747
+ data = 'CC -!- PTM: N-glycosylated, contains approximately 8 kDa of N-linked
748
+ CC carbohydrate.
749
+ CC -!- PTM: Palmitoylated.'
750
+ sp = Bio::SPTR.new(data)
751
+ assert_equal(["N-glycosylated, contains approximately 8 kDa of N-linked carbohydrate.",
752
+ "Palmitoylated."],
753
+ sp.cc['PTM'])
754
+ end
755
+
756
+ def test_rna_editing
757
+ # RNA EDITING Description of any type of RNA editing that leads to one or more amino acid changes
758
+ data = 'CC -!- RNA EDITING: Modified_positions=50, 59, 78, 87, 104, 132, 139,
759
+ CC 146, 149, 160, 170, 177, 185, 198, 208, 223, 226, 228, 243, 246,
760
+ CC 252, 260, 264, 277, 285, 295; Note=The nonsense codons at
761
+ CC positions 50, 78, 104, 260 and 264 are modified to sense codons.'
762
+
763
+ data = 'CC -!- RNA EDITING: Modified_positions=607; Note=Fully edited in the
764
+ CC brain. Heteromerically expressed edited GLUR2 (R) receptor
765
+ CC complexes are impermeable to calcium, whereas the unedited (Q)
766
+ CC forms are highly permeable to divalent ions (By similarity).'
767
+ sp = Bio::SPTR.new(data)
768
+ assert_equal(["Modified_positions=607; Note=Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."],
769
+ sp.cc['RNA EDITING'])
770
+ assert_equal({"Modified_positions" => ['607'],
771
+ "Note" => "Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."},
772
+ sp.cc('RNA EDITING'))
773
+ end
774
+
775
+ def test_similarity
776
+ # SIMILARITY Description of the similaritie(s) (sequence or structural) of a protein with other proteins
777
+ data = 'CC -!- SIMILARITY: Contains 1 protein kinase domain.
778
+ CC -!- SIMILARITY: Contains 1 RGS domain.'
779
+ sp = Bio::SPTR.new(data)
780
+ assert_equal(["Contains 1 protein kinase domain.", "Contains 1 RGS domain."],
781
+ sp.cc['SIMILARITY'])
782
+ end
783
+
784
+ def test_subcellular_location
785
+ # SUBCELLULAR LOCATION Description of the subcellular location of the mature protein
786
+
787
+ data = 'CC -!- SUBCELLULAR LOCATION: Or: Cytoplasm. Or: Secreted protein. May be
788
+ CC secreted by a non-classical secretory pathway.'
789
+
790
+ data = "CC -!- SUBCELLULAR LOCATION: Cytoplasmic or may be secreted by a non-
791
+ CC classical secretory pathway (By similarity)."
792
+
793
+ data = "CC -!- SUBCELLULAR LOCATION: Cytoplasm. In neurons, axonally transported
794
+ CC to the nerve terminals."
795
+
796
+ data = "CC -!- SUBCELLULAR LOCATION: Cell wall. Probably the external side of the
797
+ CC cell wall."
798
+
799
+ data = "CC -!- SUBCELLULAR LOCATION: Endosome; late endosome; late endosomal
800
+ CC membrane; single-pass type I membrane protein. Lysosome; lysosomal
801
+ CC membrane; single-pass type I membrane protein. Localizes to late
802
+ CC endocytic compartment. Associates with lysosome membranes."
803
+
804
+
805
+ data = "CC -!- SUBCELLULAR LOCATION: Plastid; chloroplast; chloroplast membrane;
806
+ CC peripheral membrane protein. Plastid; chloroplast; chloroplast
807
+ CC stroma."
808
+ sp = Bio::SPTR.new(data)
809
+ assert_equal(["Plastid; chloroplast; chloroplast membrane; peripheral membrane protein. Plastid; chloroplast; chloroplast stroma."],
810
+ sp.cc['SUBCELLULAR LOCATION'])
811
+ assert_equal([["Plastid",
812
+ "chloroplast",
813
+ "chloroplast membrane",
814
+ "peripheral membrane protein"],
815
+ ["Plastid", "chloroplast",
816
+ "chloroplast stroma"]],
817
+ sp.cc('SUBCELLULAR LOCATION'))
818
+ end
819
+
820
+ def test_subunit
821
+ # SUBUNIT Description of the quaternary structure of a protein and any kind of interactions with other proteins or protein complexes; except for receptor-ligand interactions, which are described in the topic FUNCTION.
822
+
823
+ data = 'CC -!- SUBUNIT: Interacts with BTK. Interacts with all isoforms of MAPK8,
824
+ CC MAPK9, MAPK10 and MAPK12.'
825
+
826
+ data = 'CC -!- SUBUNIT: Homotetramer.'
827
+ sp = Bio::SPTR.new(data)
828
+ assert_equal(["Homotetramer."],
829
+ sp.cc['SUBUNIT'])
830
+ end
831
+
832
+ def test_tissue_specificity
833
+ # TISSUE SPECIFICITY Description of the tissue-specific expression of mRNA or protein
834
+ data = "CC -!- TISSUE SPECIFICITY: Heart, brain and liver mitochondria."
835
+
836
+ data = "CC -!- TISSUE SPECIFICITY: Widely expressed with highest expression in
837
+ CC thymus, testis, embryo and proliferating blood lymphocytes."
838
+
839
+ data = "CC -!- TISSUE SPECIFICITY: Isoform 2 is highly expressed in the brain,
840
+ CC heart, spleen, kidney and blood. Isoform 2 is expressed (at
841
+ CC protein level) in the spleen, skeletal muscle and gastrointestinal
842
+ CC epithelia."
843
+ sp = Bio::SPTR.new(data)
844
+ assert_equal(["Isoform 2 is highly expressed in the brain, heart, spleen, kidney and blood. Isoform 2 is expressed (at protein level) in the spleen, skeletal muscle and gastrointestinal epithelia."],
845
+ sp.cc['TISSUE SPECIFICITY'])
846
+ end
847
+
848
+ def test_toxic_dose
849
+ # TOXIC DOSE Description of the lethal dose (LD), paralytic dose (PD) or effective dose of a protein
850
+ data = 'CC -!- TOXIC DOSE: LD(50) is 12 mg/kg by intraperitoneal injection.'
851
+ sp = Bio::SPTR.new(data)
852
+ assert_equal(["LD(50) is 12 mg/kg by intraperitoneal injection."],
853
+ sp.cc['TOXIC DOSE'])
854
+ end
855
+
856
+ def test_web_resource
857
+ # WEB RESOURCE Description of a cross-reference to a network database/resource for a specific protein; see 3.21.34
858
+ data = 'CC -!- WEB RESOURCE: NAME=Inherited peripheral neuropathies mutation db;
859
+ CC URL="http://www.molgen.ua.ac.be/CMTMutations/".
860
+ CC -!- WEB RESOURCE: NAME=Connexin-deafness homepage;
861
+ CC URL="http://www.crg.es/deafness/".
862
+ CC -!- WEB RESOURCE: NAME=GeneReviews;
863
+ CC URL="http://www.genetests.org/query?gene=GJB1".'
864
+ sp = Bio::SPTR.new(data)
865
+ assert_equal(['NAME=Inherited peripheral neuropathies mutation db; URL="http://www.molgen.ua.ac.be/CMTMutations/".',
866
+ 'NAME=Connexin-deafness homepage; URL="http://www.crg.es/deafness/".',
867
+ 'NAME=GeneReviews; URL="http://www.genetests.org/query?gene=GJB1".'],
868
+ sp.cc['WEB RESOURCE'])
869
+ assert_equal([{'NAME' => "Inherited peripheral neuropathies mutation db",
870
+ 'URL' => 'http://www.molgen.ua.ac.be/CMTMutations/', 'NOTE' => nil},
871
+ {'NAME' => "Connexin-deafness homepage",
872
+ 'URL' => 'http://www.crg.es/deafness/', 'NOTE' => nil},
873
+ {'NAME' => "GeneReviews",
874
+ 'URL' => 'http://www.genetests.org/query?gene=GJB1', 'NOTE' => nil}],
875
+ sp.cc('WEB RESOURCE'))
876
+
877
+ end
878
+
879
+ end # class TestSPTRCC
880
+
881
+ # http://br.expasy.org/sprot/userman.html#Ref_line
882
+ class TestSPTRRef < Test::Unit::TestCase
883
+
884
+ def setup
885
+ data = 'RN [1]
886
+ RP NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C), FUNCTION, INTERACTION
887
+ RP WITH PKC-3, SUBCELLULAR LOCATION, TISSUE SPECIFICITY, DEVELOPMENTAL
888
+ RP STAGE, AND MUTAGENESIS OF PHE-175 AND PHE-221.
889
+ RC STRAIN=Bristol N2;
890
+ RX PubMed=11134024; DOI=10.1074/jbc.M008990200;
891
+ RG The mouse genome sequencing consortium;
892
+ RA Galinier A., Bleicher F., Negre D., Perriere G., Duclos B.,
893
+ RA Cozzone A.J., Cortay J.-C.;
894
+ RT "A novel adapter protein employs a phosphotyrosine binding domain and
895
+ RT exceptionally basic N-terminal domains to capture and localize an
896
+ RT atypical protein kinase C: characterization of Caenorhabditis elegans
897
+ RT C kinase adapter 1, a protein that avidly binds protein kinase C3.";
898
+ RL J. Biol. Chem. 276:10463-10475(2001).'
899
+ @obj = SPTR.new(data)
900
+ end
901
+
902
+ def test_ref
903
+ res = {"RT" => "A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
904
+ "RL" => "J. Biol. Chem. 276:10463-10475(2001).",
905
+ "RA" => "Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
906
+ "RX" => {"MEDLINE" => nil,
907
+ "DOI" => "10.1074/jbc.M008990200",
908
+ "PubMed" => "11134024"},
909
+ "RC" => [{"Text" => "Bristol N2", "Token" => "STRAIN"}],
910
+ "RN" => "[1]",
911
+ "RP" => ["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
912
+ "FUNCTION",
913
+ "INTERACTION WITH PKC-3",
914
+ "SUBCELLULAR LOCATION",
915
+ "TISSUE SPECIFICITY",
916
+ "DEVELOPMENTAL STAGE",
917
+ "MUTAGENESIS OF PHE-175 AND PHE-221"],
918
+ "RG" => ["The mouse genome sequencing consortium"]}
919
+ assert_equal(res, @obj.ref.first)
920
+ end
921
+
922
+ def test_RN
923
+ assert_equal("[1]", @obj.ref.first['RN'])
924
+ end
925
+
926
+ def test_RP
927
+ assert_equal(["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORMS A AND C)",
928
+ "FUNCTION", "INTERACTION WITH PKC-3",
929
+ "SUBCELLULAR LOCATION",
930
+ "TISSUE SPECIFICITY",
931
+ "DEVELOPMENTAL STAGE",
932
+ "MUTAGENESIS OF PHE-175 AND PHE-221"],
933
+ @obj.ref.first['RP'])
934
+ end
935
+
936
+ def test_RC
937
+ assert_equal([{"Text"=>"Bristol N2", "Token"=>"STRAIN"}],
938
+ @obj.ref.first['RC'])
939
+ end
940
+
941
+ def test_RX
942
+ assert_equal({'MEDLINE' => nil,
943
+ 'PubMed' => '11134024',
944
+ 'DOI' => '10.1074/jbc.M008990200'},
945
+ @obj.ref.first['RX'])
946
+ end
947
+
948
+ def test_RG
949
+ assert_equal(["The mouse genome sequencing consortium"],
950
+ @obj.ref.first['RG'])
951
+ end
952
+
953
+ def test_RA
954
+ assert_equal("Galinier A., Bleicher F., Negre D., Perriere G., Duclos B., Cozzone A.J., Cortay J.-C.",
955
+ @obj.ref.first['RA'])
956
+ end
957
+
958
+ def test_RT
959
+ assert_equal("A novel adapter protein employs a phosphotyrosine binding domain and exceptionally basic N-terminal domains to capture and localize an atypical protein kinase C: characterization of Caenorhabditis elegans C kinase adapter 1, a protein that avidly binds protein kinase C3.",
960
+ @obj.ref.first['RT'])
961
+ end
962
+
963
+ def test_RL
964
+ assert_equal("J. Biol. Chem. 276:10463-10475(2001).",
965
+ @obj.ref.first['RL'])
966
+ end
967
+
968
+ end # class TestSPTRReferences
969
+
970
+
971
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.0
972
+ class TestSPTRSwissProtRel41_0 < Test::Unit::TestCase
973
+ # Progress in the conversion of Swiss-Prot to mixed-case characters
974
+
975
+ # Multiple RP lines
976
+ def test_multiple_RP_lines
977
+ data = "RN [1]
978
+ RP SEQUENCE FROM N.A., SEQUENCE OF 23-42 AND 351-365, AND
979
+ RP CHARACTERIZATION."
980
+ sp = SPTR.new(data)
981
+ assert_equal(['SEQUENCE FROM N.A.',
982
+ 'SEQUENCE OF 23-42 AND 351-365',
983
+ 'CHARACTERIZATION'],
984
+ sp.ref.first['RP'])
985
+ end
986
+ end
987
+
988
+
989
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.1
990
+ class TestSPTRSwissProtRel41_1 < Test::Unit::TestCase
991
+ # New syntax of the CC line topic ALTERNATIVE PRODUCTS
992
+ def test_alternative_products
993
+ data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
994
+ CC -!- ALTERNATIVE PRODUCTS:
995
+ CC Event=Alternative promoter;
996
+ CC Comment=Free text;
997
+ CC Event=Alternative splicing; Named isoforms=2;
998
+ CC Comment=Optional free text;
999
+ CC Name=Isoform_1; Synonyms=Synonym_1;
1000
+ CC IsoId=Isoform_identifier_1;
1001
+ CC Sequence=Displayed;
1002
+ CC Note=Free text;
1003
+ CC Name=Isoform_2; Synonyms=Synonym_1, Synonym_2;
1004
+ CC IsoId=Isoform_identifier_1, Isoform_identifer_2;
1005
+ CC Sequence=VSP_identifier_1, VSP_identifier_2;
1006
+ CC Note=Free text;
1007
+ CC Event=Alternative initiation;
1008
+ CC Comment=Free text;"
1009
+ sp = SPTR.new(data)
1010
+ res = {"Comment" => "Free text",
1011
+ "Named isoforms" => "2",
1012
+ "Variants" => [{"Name" => "Isoform_1",
1013
+ "Synonyms" => ["Synonym_1"],
1014
+ "IsoId" => ["Isoform_identifier_1"],
1015
+ "Sequence" => ["Displayed"] },
1016
+ {"Name" => "Isoform_2",
1017
+ "Synonyms" => ["Synonym_1", "Synonym_2"],
1018
+ "IsoId" => ["Isoform_identifier_1", "Isoform_identifer_2"],
1019
+ "Sequence" => ["VSP_identifier_1", "VSP_identifier_2"]}],
1020
+ "Event" => ["Alternative promoter"]}
1021
+ assert_equal(res,
1022
+ sp.cc('ALTERNATIVE PRODUCTS'))
1023
+ end
1024
+
1025
+ def test_alternative_products_with_ft
1026
+ data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
1027
+ CC -!- ALTERNATIVE PRODUCTS:
1028
+ CC Event=Alternative splicing; Named isoforms=6;
1029
+ CC Name=1;
1030
+ CC IsoId=Q15746-4; Sequence=Displayed;
1031
+ CC Name=2;
1032
+ CC IsoId=Q15746-5; Sequence=VSP_000040;
1033
+ CC Name=3A;
1034
+ CC IsoId=Q15746-6; Sequence=VSP_000041, VSP_000043;
1035
+ CC Name=3B;
1036
+ CC IsoId=Q15746-7; Sequence=VSP_000040, VSP_000041, VSP_000042;
1037
+ CC Name=4;
1038
+ CC IsoId=Q15746-8; Sequence=VSP_000041, VSP_000042;
1039
+ CC Name=del-1790;
1040
+ CC IsoId=Q15746-9; Sequence=VSP_000044;
1041
+ FT VARSPLIC 437 506 VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA
1042
+ FT RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in
1043
+ FT isoform 2 and isoform 3B).
1044
+ FT /FTId=VSP_004791.
1045
+ FT VARSPLIC 1433 1439 DEVEVSD -> MKWRCQT (in isoform 3A,
1046
+ FT isoform 3B and isoform 4).
1047
+ FT /FTId=VSP_004792.
1048
+ FT VARSPLIC 1473 1545 Missing (in isoform 4).
1049
+ FT /FTId=VSP_004793.
1050
+ FT VARSPLIC 1655 1705 Missing (in isoform 3A and isoform 3B).
1051
+ FT /FTId=VSP_004794.
1052
+ FT VARSPLIC 1790 1790 Missing (in isoform Del-1790).
1053
+ FT /FTId=VSP_004795."
1054
+ sp = SPTR.new(data)
1055
+
1056
+ assert_equal({"Comment" => "",
1057
+ "Named isoforms" => "6",
1058
+ "Variants" => [{"IsoId"=>["Q15746-4"],
1059
+ "Name"=>"1",
1060
+ "Synonyms"=>[],
1061
+ "Sequence"=>["Displayed"]},
1062
+ {"IsoId"=>["Q15746-5"],
1063
+ "Name"=>"2",
1064
+ "Synonyms"=>[],
1065
+ "Sequence"=>["VSP_000040"]},
1066
+ {"IsoId"=>["Q15746-6"],
1067
+ "Name"=>"3A",
1068
+ "Synonyms"=>[],
1069
+ "Sequence"=>["VSP_000041", "VSP_000043"]},
1070
+ {"IsoId"=>["Q15746-7"],
1071
+ "Name"=>"3B",
1072
+ "Synonyms"=>[],
1073
+ "Sequence"=>["VSP_000040", "VSP_000041", "VSP_000042"]},
1074
+ {"IsoId"=>["Q15746-8"],
1075
+ "Name"=>"4",
1076
+ "Synonyms"=>[],
1077
+ "Sequence"=>["VSP_000041", "VSP_000042"]},
1078
+ {"IsoId"=>["Q15746-9"],
1079
+ "Name"=>"del-1790",
1080
+ "Synonyms"=>[],
1081
+ "Sequence"=>["VSP_000044"]}],
1082
+ "Event"=>["Alternative splicing"]},
1083
+ sp.cc('ALTERNATIVE PRODUCTS'))
1084
+ assert_equal([{"FTId"=>"VSP_004791",
1085
+ "From"=>437,
1086
+ "To"=>506,
1087
+ "Description"=>"VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).",
1088
+ "diff"=> ["VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKARTRDSGTYSCTASNAQGQVSCSWTLQVER", "G"],
1089
+ "original"=> ["VARSPLIC", "437", "506", "VSGIPKPEVAWFLEGTPVRRQEGSIEVYEDAGSHYLCLLKA RTRDSGTYSCTASNAQGQVSCSWTLQVER -> G (in isoform 2 and isoform 3B).", "/FTId=VSP_004791."]},
1090
+ {"FTId"=>"VSP_004792",
1091
+ "From"=>1433,
1092
+ "diff"=>["DEVEVSD", "MKWRCQT"],
1093
+ "To"=>1439,
1094
+ "original"=> ["VARSPLIC", "1433", "1439", "DEVEVSD -> MKWRCQT (in isoform 3A, isoform 3B and isoform 4).", "/FTId=VSP_004792."],
1095
+ "Description"=>"DEVEVSD -> MKWRCQT (in isoform 3A, isoform 3B and isoform 4)."},
1096
+ {"FTId"=>"VSP_004793",
1097
+ "From"=>1473,
1098
+ "diff"=>[nil, nil],
1099
+ "To"=>1545,
1100
+ "original"=> ["VARSPLIC", "1473", "1545", "Missing (in isoform 4).", "/FTId=VSP_004793."], "Description"=>"Missing (in isoform 4)."},
1101
+ {"FTId"=>"VSP_004794",
1102
+ "From"=>1655,
1103
+ "diff"=>[nil, nil],
1104
+ "To"=>1705,
1105
+ "original"=> ["VARSPLIC", "1655", "1705", "Missing (in isoform 3A and isoform 3B).", "/FTId=VSP_004794."],
1106
+ "Description"=>"Missing (in isoform 3A and isoform 3B)."},
1107
+ {"FTId"=>"VSP_004795",
1108
+ "From"=>1790,
1109
+ "diff"=>[nil, nil],
1110
+ "To"=>1790,
1111
+ "original"=>["VARSPLIC", "1790", "1790", "Missing (in isoform Del-1790).", "/FTId=VSP_004795."],
1112
+ "Description"=>"Missing (in isoform Del-1790)."}],
1113
+ sp.ft['VARSPLIC'])
1114
+ end
1115
+ end
1116
+
1117
+
1118
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.10
1119
+ class TestSPTRSwissProtRel41_10 < Test::Unit::TestCase
1120
+ # Reference Comment (RC) line topics may span lines
1121
+ def test_RC_lines
1122
+ data = "RN [1]
1123
+ RC STRAIN=AZ.026, DC.005, GA.039, GA2181, IL.014, IN.018, KY.172, KY2.37,
1124
+ RC LA.013, MN.001, MNb027, MS.040, NY.016, OH.036, TN.173, TN2.38,
1125
+ RC UT.002, AL.012, AZ.180, MI.035, VA.015, and IL2.17;"
1126
+ sp = SPTR.new(data)
1127
+ assert_equal([{"Text"=>"AZ.026", "Token"=>"STRAIN"},
1128
+ {"Text"=>"DC.005", "Token"=>"STRAIN"},
1129
+ {"Text"=>"GA.039", "Token"=>"STRAIN"},
1130
+ {"Text"=>"GA2181", "Token"=>"STRAIN"},
1131
+ {"Text"=>"IL.014", "Token"=>"STRAIN"},
1132
+ {"Text"=>"IN.018", "Token"=>"STRAIN"},
1133
+ {"Text"=>"KY.172", "Token"=>"STRAIN"},
1134
+ {"Text"=>"KY2.37", "Token"=>"STRAIN"},
1135
+ {"Text"=>"LA.013", "Token"=>"STRAIN"},
1136
+ {"Text"=>"MN.001", "Token"=>"STRAIN"},
1137
+ {"Text"=>"MNb027", "Token"=>"STRAIN"},
1138
+ {"Text"=>"MS.040", "Token"=>"STRAIN"},
1139
+ {"Text"=>"NY.016", "Token"=>"STRAIN"},
1140
+ {"Text"=>"OH.036", "Token"=>"STRAIN"},
1141
+ {"Text"=>"TN.173", "Token"=>"STRAIN"},
1142
+ {"Text"=>"TN2.38", "Token"=>"STRAIN"},
1143
+ {"Text"=>"UT.002", "Token"=>"STRAIN"},
1144
+ {"Text"=>"AL.012", "Token"=>"STRAIN"},
1145
+ {"Text"=>"AZ.180", "Token"=>"STRAIN"},
1146
+ {"Text"=>"MI.035", "Token"=>"STRAIN"},
1147
+ {"Text"=>"VA.015", "Token"=>"STRAIN"},
1148
+ {"Text"=>"IL2.17", "Token"=>"STRAIN"}],
1149
+ sp.ref.first['RC'])
1150
+ end
1151
+ end
1152
+
1153
+
1154
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.20
1155
+ class TestSPTRSwissProtRel41_20 < Test::Unit::TestCase
1156
+ # Case and wording change for submissions to Swiss-Prot in reference location (RL) lines
1157
+ def test_RL_lines
1158
+ data = "RL Submitted (MAY-2002) to the SWISS-PROT data bank."
1159
+ sp = SPTR.new(data)
1160
+ assert_equal('',
1161
+ sp.ref.first['RL'])
1162
+ end
1163
+
1164
+ # New comment line (CC) topic ALLERGEN
1165
+ def test_CC_allergen
1166
+ data = "CC -!- ALLERGEN: Causes an allergic reaction in human. Binds IgE. It is a
1167
+ CC partially heat-labile allergen that may cause both respiratory and
1168
+ CC food-allergy symptoms in patients with the bird-egg syndrome."
1169
+ sp = SPTR.new(data)
1170
+ assert_equal(["Causes an allergic reaction in human. Binds IgE. It is a partially heat-labile allergen that may cause both respiratory and food-allergy symptoms in patients with the bird-egg syndrome."],
1171
+ sp.cc("ALLERGEN"))
1172
+ end
1173
+ end
1174
+
1175
+
1176
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel42.6
1177
+ class TestSPTRSwissProtRel42_6 < Test::Unit::TestCase
1178
+ # New comment line (CC) topic RNA EDITING
1179
+ def test_CC_rna_editing
1180
+ data = "CC -!- RNA EDITING: Modified_positions=393, 431, 452, 495."
1181
+ sp = SPTR.new(data)
1182
+ assert_equal({"Note"=>"",
1183
+ "Modified_positions"=>['393', '431', '452', '495']},
1184
+ sp.cc("RNA EDITING"))
1185
+
1186
+ data = "CC -!- RNA EDITING: Modified_positions=59, 78, 94, 98, 102, 121; Note=The
1187
+ CC stop codon at position 121 is created by RNA editing. The nonsense
1188
+ CC codon at position 59 is modified to a sense codon."
1189
+ sp = SPTR.new(data)
1190
+ assert_equal({"Note"=>"The stop codon at position 121 is created by RNA editing. The nonsense codon at position 59 is modified to a sense codon.",
1191
+ "Modified_positions"=>['59', '78', '94', '98', '102', '121']},
1192
+ sp.cc("RNA EDITING"))
1193
+
1194
+ data = "CC -!- RNA EDITING: Modified_positions=Not_applicable; Note=Some
1195
+ CC positions are modified by RNA editing via nucleotide insertion or
1196
+ CC deletion. The initiator methionine is created by RNA editing."
1197
+ sp = SPTR.new(data)
1198
+ assert_equal({'Modified_positions' => ['Not_applicable'],
1199
+ 'Note' => "Some positions are modified by RNA editing via nucleotide insertion or deletion. The initiator methionine is created by RNA editing."},
1200
+ sp.cc("RNA EDITING"))
1201
+ end
1202
+ end
1203
+
1204
+
1205
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel1_12
1206
+ class TestSPTRUniProtRel1_12 < Test::Unit::TestCase
1207
+ # Digital Object Identifier (DOI) in the RX line
1208
+ def test_DOI_in_RX_line
1209
+ # RX [MEDLINE=Medline_identifier; ][PubMed=Pubmed_identifier; ][DOI=Digital_object_identifier;]
1210
+ data = "
1211
+ RN [1]
1212
+ RX MEDLINE=97291283; PubMed=9145897; DOI=10.1007/s00248-002-2038-4;"
1213
+ sp = SPTR.new(data)
1214
+ assert_equal({'MEDLINE' => '97291283',
1215
+ 'PubMed' => '9145897',
1216
+ 'DOI' => '10.1007/s00248-002-2038-4'},
1217
+ sp.ref.first['RX'])
1218
+ end
1219
+
1220
+ # New line type: RG (Reference Group)
1221
+ def test_RG_line
1222
+ data = "
1223
+ RN [1]
1224
+ RG The C. elegans sequencing consortium;
1225
+ RG The Brazilian network for HIV isolation and characterization;"
1226
+ sp = SPTR.new(data)
1227
+ assert_equal(['The C. elegans sequencing consortium',
1228
+ 'The Brazilian network for HIV isolation and characterization'],
1229
+ sp.ref.first['RG'])
1230
+ end
1231
+ end
1232
+
1233
+
1234
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_0
1235
+ class TestSPTRUniProtRel2_0 < Test::Unit::TestCase
1236
+ # New format for the GN (Gene Name) line
1237
+ # GN Name=<name>; Synonyms=<name1>[, <name2>...]; OrderedLocusNames=<name1>[, <name2>...];
1238
+ # xsGN ORFNames=<name1>[, <name2>...];
1239
+ def test_GN_line
1240
+ data = "GN Name=atpG; Synonyms=uncG, papC;
1241
+ GN OrderedLocusNames=b3733, c4659, z5231, ECs4675, SF3813, S3955;"
1242
+ sp = SPTR.new(data)
1243
+ assert_equal([{:orfs => [],
1244
+ :loci => ["b3733", "c4659", "z5231", "ECs4675", "SF3813", "S3955"],
1245
+ :name => "atpG",
1246
+ :synonyms => ["uncG", "papC"]}],
1247
+ sp.gn)
1248
+
1249
+ data = "GN ORFNames=SPAC1834.11c;"
1250
+ sp = SPTR.new(data)
1251
+ assert_equal([{:orfs => ['SPAC1834.11c'],
1252
+ :loci => [],
1253
+ :name => '',
1254
+ :synonyms => []}],
1255
+ sp.gn)
1256
+
1257
+ data = "GN Name=cysA1; Synonyms=cysA; OrderedLocusNames=Rv3117, MT3199;
1258
+ GN ORFNames=MTCY164.27;
1259
+ GN and
1260
+ GN Name=cysA2; OrderedLocusNames=Rv0815c, MT0837; ORFNames=MTV043.07c;"
1261
+ sp = SPTR.new(data)
1262
+ assert_equal([{:orfs => ["MTCY164.27"],
1263
+ :loci => ["Rv3117", "MT3199"],
1264
+ :name => "cysA1",
1265
+ :synonyms => ["cysA"]},
1266
+ {:orfs => ["MTV043.07c"],
1267
+ :loci => ["Rv0815c", "MT0837"],
1268
+ :name => "cysA2",
1269
+ :synonyms => []}],
1270
+ sp.gn)
1271
+ end
1272
+ end
1273
+
1274
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_1
1275
+ class TestSPTRUniProtRel2_1 < Test::Unit::TestCase
1276
+ # Format change in the comment line (CC) topic: MASS SPECTROMETRY
1277
+ def test_CC_mass_spectrometry
1278
+ data = "CC -!- MASS SPECTROMETRY: MW=32875.93; METHOD=MALDI;
1279
+ CC RANGE=1-284 (Isoform 3); NOTE=Ref.6."
1280
+ sp = SPTR.new(data)
1281
+ assert_equal([{"RANGE"=>"1-284",
1282
+ "METHOD"=>"MALDI",
1283
+ "MW_ERR"=>nil,
1284
+ "NOTE"=>"Ref.6",
1285
+ "MW"=>"32875.93"}],
1286
+ sp.cc("MASS SPECTROMETRY"))
1287
+ end
1288
+ end
1289
+
1290
+
1291
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_3
1292
+ class TestSPTRUniProtRel2_3 < Test::Unit::TestCase
1293
+ # New RL line structure for electronic publications
1294
+ def test_RL_line
1295
+ data = "RL Submitted (XXX-YYYY) to the HIV data bank."
1296
+ sp = SPTR.new(data)
1297
+ assert_equal('',
1298
+ sp.ref.first['RL'])
1299
+ end
1300
+
1301
+ # Format change in the cross-reference to PDB
1302
+ def test_DR_PDB
1303
+ data = "DR PDB; 1NB3; X-ray; A/B/C/D=116-335, P/R/S/T=98-105."
1304
+ sp = SPTR.new(data)
1305
+ assert_equal([["1NB3", "X-ray", "A/B/C/D=116-335, P/R/S/T=98-105"]],
1306
+ sp.dr['PDB'])
1307
+ end
1308
+ end
1309
+
1310
+
1311
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_4
1312
+ class TestSPTRUniProtRel3_4 < Test::Unit::TestCase
1313
+ # Changes in the RP (Reference Position) line
1314
+ def test_RP_line
1315
+ data = "
1316
+ RN [1]
1317
+ RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1), PROTEIN SEQUENCE
1318
+ RP OF 108-131; 220-231 AND 349-393, CHARACTERIZATION, AND MUTAGENESIS OF
1319
+ RP ARG-336."
1320
+ sp = SPTR.new(data)
1321
+ assert_equal(['NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1)',
1322
+ 'PROTEIN SEQUENCE OF 108-131; 220-231 AND 349-393',
1323
+ 'CHARACTERIZATION',
1324
+ 'MUTAGENESIS OF ARG-336'],
1325
+ sp.ref.first['RP'])
1326
+
1327
+ data = "
1328
+ RN [1]
1329
+ RP NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]."
1330
+ sp = SPTR.new(data)
1331
+ assert_equal(['NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]'],
1332
+ sp.ref.first['RP'])
1333
+ end
1334
+
1335
+
1336
+ # New comment line (CC) topic: BIOPHYSICOCHEMICAL PROPERTIES
1337
+ def test_CC_biophysiochemical_properties
1338
+ data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
1339
+ CC Absorption:
1340
+ CC Abs(max)=395 nm;
1341
+ CC Note=Exhibits a smaller absorbance peak at 470 nm. The
1342
+ CC fluorescence emission spectrum peaks at 509 nm with a shoulder
1343
+ CC at 540 nm;"
1344
+ sp = SPTR.new(data)
1345
+ assert_equal({"Redox potential" => "",
1346
+ "Temperature dependence" => "",
1347
+ "Kinetic parameters" => {},
1348
+ "Absorption" => {"Note" => "Exhibits a smaller absorbance peak at 470 nm. The fluorescence emission spectrum peaks at 509 nm with a shoulder at 540 nm",
1349
+ "Abs(max)" => "395 nm"},
1350
+ "pH dependence" => ""},
1351
+ sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
1352
+
1353
+ data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
1354
+ CC Kinetic parameters:
1355
+ CC KM=62 mM for glucose;
1356
+ CC KM=90 mM for maltose;
1357
+ CC Vmax=0.20 mmol/min/mg enzyme with glucose as substrate;
1358
+ CC Vmax=0.11 mmol/min/mg enzyme with maltose as substrate;
1359
+ CC Note=Acetylates glucose, maltose, mannose, galactose, and
1360
+ CC fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07,
1361
+ CC 0.04;"
1362
+ sp = SPTR.new(data)
1363
+ assert_equal({"Redox potential" => "",
1364
+ "Temperature dependence" => "",
1365
+ "Kinetic parameters" => {"KM" => "62 mM for glucose; KM=90 mM for maltose",
1366
+ "Note" => "Acetylates glucose, maltose, mannose, galactose, and fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07, 0.04",
1367
+ "Vmax" => "0.20 mmol/min/mg enzyme with glucose as substrate"},
1368
+ "Absorption" => {},
1369
+ "pH dependence" => ""},
1370
+ sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
1371
+
1372
+ data = "CC -!- BIOPHYSICOCHEMICAL PROPERTIES:
1373
+ CC Kinetic parameters:
1374
+ CC KM=1.76 uM for chlorophyll;
1375
+ CC pH dependence:
1376
+ CC Optimum pH is 7.5. Active from pH 5.0 to 9.0;
1377
+ CC Temperature dependence:
1378
+ CC Optimum temperature is 45 degrees Celsius. Active from 30 to 60
1379
+ CC degrees Celsius;"
1380
+ sp = SPTR.new(data)
1381
+ assert_equal({"Redox potential" => "",
1382
+ "Temperature dependence" => "Optimum temperature is 45 degrees Celsius. Active from 30 to 60 degrees Celsius",
1383
+ "Kinetic parameters" => {},
1384
+ "Absorption" => {},
1385
+ "pH dependence" => "Optimum pH is 7.5. Active from pH 5.0 to 9.0"},
1386
+ sp.cc("BIOPHYSICOCHEMICAL PROPERTIES"))
1387
+ end
1388
+ end
1389
+
1390
+
1391
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_5
1392
+ class TestSPTRUniProtRel3_5 < Test::Unit::TestCase
1393
+ # Extension of the Swiss-Prot entry name format
1394
+ def test_entry_name_format
1395
+ # TBD
1396
+ end
1397
+ end
1398
+
1399
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel4_0
1400
+ class TestSPTRUniProtRel4_0 < Test::Unit::TestCase
1401
+ # Extension of the TrEMBL entry name format
1402
+
1403
+ # Change of the entry name in many Swiss-Prot entries
1404
+
1405
+ # New comment line (CC) topic: INTERACTION
1406
+ def test_CC_interaction
1407
+ data = "CC -!- INTERACTION:
1408
+ CC P11450:fcp3c; NbExp=1; IntAct=EBI-126914, EBI-159556;"
1409
+ sp = SPTR.new(data)
1410
+ assert_equal([{"SP_Ac" => "P11450",
1411
+ "identifier" => "fcp3c",
1412
+ "optional_identifier" => nil,
1413
+ "NbExp" => "1",
1414
+ "IntAct" => ["EBI-126914", "EBI-159556"]}],
1415
+ sp.cc("INTERACTION"))
1416
+ end
1417
+
1418
+ def test_CC_interaction_isoform
1419
+ data = "CC -!- INTERACTION:
1420
+ CC Q9W1K5-1:cg11299; NbExp=1; IntAct=EBI-133844, EBI-212772;"
1421
+ sp = SPTR.new(data)
1422
+ assert_equal([{"SP_Ac" => 'Q9W1K5-1',
1423
+ "identifier" => 'cg11299',
1424
+ "optional_identifier" => nil,
1425
+ "NbExp" => "1",
1426
+ "IntAct" => ["EBI-133844", "EBI-212772"]}],
1427
+ sp.cc("INTERACTION"))
1428
+ end
1429
+
1430
+ def test_CC_interaction_no_gene_name
1431
+ data = "CC -!- INTERACTION:
1432
+ CC Q8NI08:-; NbExp=1; IntAct=EBI-80809, EBI-80799;"
1433
+ sp = SPTR.new(data)
1434
+ assert_equal([{"SP_Ac" => 'Q8NI08',
1435
+ "identifier" => '-',
1436
+ "optional_identifier" => nil,
1437
+ "NbExp" => "1",
1438
+ "IntAct" => ["EBI-80809", "EBI-80799"]}],
1439
+ sp.cc("INTERACTION"))
1440
+ end
1441
+
1442
+ def test_CC_interaction_self_association
1443
+ data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
1444
+ CC -!- INTERACTION:
1445
+ CC Self; NbExp=1; IntAct=EBI-123485, EBI-123485;"
1446
+ sp = SPTR.new(data)
1447
+ assert_equal([{"SP_Ac" => 'TEST_ENTRY',
1448
+ "identifier" => 'TEST_ENTRY',
1449
+ "optional_identifier" => nil,
1450
+ "NbExp" => "1",
1451
+ "IntAct" => ["EBI-123485", "EBI-123485"]}],
1452
+ sp.cc("INTERACTION"))
1453
+ end
1454
+
1455
+ def test_CC_interaction_The_source_organisms_of_the_interacting_proteins_are_different
1456
+ data = "CC -!- INTERACTION:
1457
+ CC Q8C1S0:2410018m14rik (xeno); NbExp=1; IntAct=EBI-394562, EBI-398761;"
1458
+ sp = SPTR.new(data)
1459
+ assert_equal([{"SP_Ac" => 'Q8C1S0',
1460
+ "identifier" => '2410018m14rik',
1461
+ "optional_identifier" => '(xeno)',
1462
+ "NbExp" => "1",
1463
+ "IntAct" => ["EBI-394562", "EBI-398761"]}],
1464
+ sp.cc("INTERACTION"))
1465
+ end
1466
+
1467
+ def test_CC_interaction_Different_isoforms_of_the_current_protein_are_shown_to_interact_with_the_same_protein
1468
+ data = "CC -!- INTERACTION:
1469
+ CC P51617:irak1; NbExp=1; IntAct=EBI-448466, EBI-358664;
1470
+ CC P51617:irak1; NbExp=1; IntAct=EBI-448472, EBI-358664;"
1471
+ sp = SPTR.new(data)
1472
+ assert_equal([{"SP_Ac" => "P51617",
1473
+ "identifier" => "irak1",
1474
+ "optional_identifier" => nil,
1475
+ "NbExp" => "1",
1476
+ "IntAct" => ["EBI-448466", "EBI-358664"]},
1477
+ {"SP_Ac" => "P51617",
1478
+ "identifier" => "irak1",
1479
+ "optional_identifier" => nil,
1480
+ "NbExp" => "1",
1481
+ "IntAct" => ["EBI-448472", "EBI-358664"]}],
1482
+ sp.cc("INTERACTION"))
1483
+ end
1484
+ end
1485
+
1486
+
1487
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_0
1488
+ class TestSPTRUniProtRel5_0 < Test::Unit::TestCase
1489
+ # Format change in the DR line
1490
+ # DR DATABASE_IDENTIFIER; PRIMARY_IDENTIFIER; SECONDARY_IDENTIFIER[; TERTIARY_IDENTIFIER][; QUATERNARY_IDENTIFIER].
1491
+ def test_DR_line
1492
+ data = "
1493
+ DR EMBL; M68939; AAA26107.1; -; Genomic_DNA.
1494
+ DR EMBL; U56386; AAB72034.1; -; mRNA."
1495
+
1496
+ sp = SPTR.new(data)
1497
+ assert_equal([["M68939", "AAA26107.1", "-", "Genomic_DNA"],
1498
+ ["U56386", "AAB72034.1", "-", "mRNA"]],
1499
+ sp.dr['EMBL'])
1500
+
1501
+ assert_equal([{" "=>"-",
1502
+ "Version"=>"AAA26107.1",
1503
+ "Accession"=>"M68939",
1504
+ "Molecular Type"=>"Genomic_DNA"},
1505
+ {" "=>"-",
1506
+ "Version"=>"AAB72034.1",
1507
+ "Accession"=>"U56386",
1508
+ "Molecular Type"=>"mRNA"}],
1509
+ sp.dr('EMBL'))
1510
+
1511
+ end
1512
+ # New feature (FT) keys and redefinition of existing FT keys
1513
+ end
1514
+
1515
+
1516
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_4
1517
+ class TestSPTRUniProtRel5_4 < Test::Unit::TestCase
1518
+ # Multiple comment line (CC) topics COFACTOR
1519
+ def test_multiple_cofactors
1520
+ data = "CC -!- COFACTOR: Binds 1 2Fe-2S cluster per subunit (By similarity).
1521
+ CC -!- COFACTOR: Binds 1 Fe(2+) ion per subunit (By similarity)."
1522
+ sp = SPTR.new(data)
1523
+ assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).",
1524
+ "Binds 1 Fe(2+) ion per subunit (By similarity)."],
1525
+ sp.cc['COFACTOR'])
1526
+ assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).",
1527
+ "Binds 1 Fe(2+) ion per subunit (By similarity)."],
1528
+ sp.cc('COFACTOR'))
1529
+ end
1530
+ end
1531
+
1532
+
1533
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_0
1534
+ class TestSPTRUniProtRel6_0 < Test::Unit::TestCase
1535
+ # Changes in the OG (OrGanelle) line
1536
+ def test_OG_line
1537
+ data = "OG Plastid."
1538
+ sp = SPTR.new(data)
1539
+ assert_equal(['Plastid'], sp.og)
1540
+
1541
+ data = "OG Plastid; Apicoplast."
1542
+ sp = SPTR.new(data)
1543
+ assert_equal(['Plastid', 'Apicoplast'], sp.og)
1544
+
1545
+ data = "OG Plastid; Chloroplast."
1546
+ sp = SPTR.new(data)
1547
+ assert_equal(['Plastid', 'Chloroplast'], sp.og)
1548
+
1549
+ data = "OG Plastid; Cyanelle."
1550
+ sp = SPTR.new(data)
1551
+ assert_equal(['Plastid', 'Cyanelle'], sp.og)
1552
+
1553
+ data = "OG Plastid; Non-photosynthetic plastid."
1554
+ sp = SPTR.new(data)
1555
+ assert_equal(['Plastid', 'Non-photosynthetic plastid'], sp.og)
1556
+ end
1557
+ end
1558
+
1559
+
1560
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_1
1561
+ class TestSPTRUniProtRel6_1 < Test::Unit::TestCase
1562
+ # Annotation changes concerning the feature key METAL
1563
+ def test_FT_metal
1564
+ old_data = "FT METAL 61 61 Copper and zinc."
1565
+ sp = SPTR.new(old_data)
1566
+ assert_equal([{'From' => 61,
1567
+ 'To' => 61,
1568
+ 'Description' => 'Copper and zinc.',
1569
+ 'FTId' =>'',
1570
+ 'diff' => [],
1571
+ 'original' => ["METAL", "61", "61", "Copper and zinc.", ""]}],
1572
+ sp.ft['METAL'])
1573
+
1574
+ new_data = "FT METAL 61 61 Copper.
1575
+ FT METAL 61 61 Zinc."
1576
+ sp = SPTR.new(new_data)
1577
+ assert_equal([{"From" => 61,
1578
+ "To" => 61,
1579
+ "Description" => "Copper.",
1580
+ "FTId" => "",
1581
+ "diff" => [],
1582
+ "original" => ["METAL", "61", "61", "Copper.", ""]},
1583
+ {"From" => 61,
1584
+ "To" => 61,
1585
+ "Description" => "Zinc.",
1586
+ "FTId" => "",
1587
+ "diff" => [],
1588
+ "original" => ["METAL", "61", "61", "Zinc.", ""]}],
1589
+ sp.ft['METAL'])
1590
+ end
1591
+ end
1592
+
1593
+
1594
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_5
1595
+ class TestSPTRUniProtRel6_5 < Test::Unit::TestCase
1596
+ # Changes in the keywlist.txt file
1597
+ # * Modification of the HI line format:
1598
+ def test_HI_line
1599
+ # HI Category: Keyword_1; ...; Keyword_n; Described_Keyword.
1600
+ # The first term listed in an HI line is a category. It is followed by a hierarchical list of keywords of that category and ends with the described keyword. There can be more than one HI line of the same category in one keyword entry.
1601
+ data = "HI Molecular function: Ionic channel; Calcium channel.
1602
+ HI Biological process: Transport; Ion transport; Calcium transport; Calcium channel.
1603
+ HI Ligand: Calcium; Calcium channel."
1604
+ sp = SPTR.new(data)
1605
+ assert_equal([{'Category' => 'Molecular function',
1606
+ 'Keywords' => ['Ionic channel'],
1607
+ 'Keyword' => 'Calcium channel'},
1608
+ {'Category' => 'Biological process',
1609
+ 'Keywords' => ['Transport', 'Ion transport', 'Calcium transport'],
1610
+ 'Keyword' => 'Calcium channel'},
1611
+ {'Category' => 'Ligand',
1612
+ 'Keywords' => ['Calcium'],
1613
+ 'Keyword' => 'Calcium channel'}],
1614
+ sp.hi)
1615
+ end
1616
+ end
1617
+
1618
+
1619
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.0
1620
+ class TestSPTRUniProtRel7_0 < Test::Unit::TestCase
1621
+ # Changes concerning dates and versions numbers (DT lines)
1622
+ def test_DT_line
1623
+ up_sp_data = "DT 01-JAN-1998, integrated into UniProtKB/Swiss-Prot.
1624
+ DT 15-OCT-2001, sequence version 3.
1625
+ DT 01-APR-2004, entry version 14."
1626
+ sp = SPTR.new(up_sp_data)
1627
+ assert_equal({"sequence" => "15-OCT-2001, sequence version 3.",
1628
+ "annotation" => "01-APR-2004, entry version 14.",
1629
+ "created" => "01-JAN-1998, integrated into UniProtKB/Swiss-Prot."},
1630
+ sp.dt)
1631
+
1632
+ up_tr_data = "DT 01-FEB-1999, integrated into UniProtKB/TrEMBL.
1633
+ DT 15-OCT-2000, sequence version 2.
1634
+ DT 15-DEC-2004, entry version 5."
1635
+ sp = SPTR.new(up_tr_data)
1636
+ assert_equal({"sequence" => "15-OCT-2000, sequence version 2.",
1637
+ "annotation" => "15-DEC-2004, entry version 5.",
1638
+ "created" => "01-FEB-1999, integrated into UniProtKB/TrEMBL."},
1639
+ sp.dt)
1640
+ end
1641
+
1642
+ # Addition of a feature (FT) key CHAIN over the whole sequence length
1643
+
1644
+ # Changes concerning the copyright statement
1645
+ def test_CC_copyright_statement
1646
+ data = "CC -----------------------------------------------------------------------
1647
+ CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
1648
+ CC Distributed under the Creative Commons Attribution-NoDerivs License
1649
+ CC -----------------------------------------------------------------------"
1650
+ sp = SPTR.new(data)
1651
+ assert_equal({}, sp.cc)
1652
+ end
1653
+ end
1654
+
1655
+
1656
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.6
1657
+ class TestSPTRUniProtRel7_6 < Test::Unit::TestCase
1658
+ # Sequences with over 10000 amino acids in UniProtKB/Swiss-Prot
1659
+ def test_10000aa
1660
+ entry_id = 'Q09165'
1661
+ data = ["SQ SEQUENCE 393 AA; 43653 MW; AD5C149FD8106131 CRC64;\n",
1662
+ " MEEPQSDPSV EPPLSQETFS DLWKLLPENN VLSPLPSQAM DDLMLSPDDI EQWFTEDPGP\n" * 200,
1663
+ "//\n"].join
1664
+ sp = SPTR.new(data)
1665
+ assert(12000, sp.seq.size)
1666
+ end
1667
+ end
1668
+
1669
+
1670
+ # Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel8.0
1671
+ class TestSPTRUniProtRel8_0 < Test::Unit::TestCase
1672
+ # Replacement of the feature key VARSPLIC by VAR_SEQ
1673
+ def test_FT_VER_SEQ
1674
+ data = "FT VAR_SEQ 1 34 Missing (in isoform 3).
1675
+ FT /FTId=VSP_004099."
1676
+ sp = SPTR.new(data)
1677
+ res = [{'From' => 1,
1678
+ 'To' => 34,
1679
+ 'Description' => 'Missing (in isoform 3).',
1680
+ 'diff' => ['', nil],
1681
+ 'FTId' => 'VSP_004099',
1682
+ 'original' => ["VAR_SEQ", "1", "34", "Missing (in isoform 3).",
1683
+ "/FTId=VSP_004099."]}]
1684
+ assert_equal(res, sp.ft('VAR_SEQ'))
1685
+ end
1686
+
1687
+
1688
+ # Syntax modification of the comment line (CC) topic ALTERNATIVE PRODUCTS
1689
+ def test_CC_alternative_products
1690
+ # CC -!- ALTERNATIVE PRODUCTS:
1691
+ # CC Event=Event(, Event)*; Named isoforms=Number_of_isoforms;
1692
+ # (CC Comment=Free_text;)?
1693
+ # (CC Name=Isoform_name;( Synonyms=Synonym(, Synonym)*;)?
1694
+ # CC IsoId=Isoform_identifier(, Isoform_identifer)*;
1695
+ # CC Sequence=(Displayed|External|Not described|Feature_identifier(, Feature_identifier)*);
1696
+ # (CC Note=Free_text;)?)+
1697
+ # Note: Variable values are represented in italics. Perl-style multipliers indicate whether a pattern (as delimited by parentheses) is optional (?), may occur 0 or more times (*), or 1 or more times (+). Alternative values are separated by a pipe symbol (|).
1698
+
1699
+ data = "CC -!- ALTERNATIVE PRODUCTS:
1700
+ CC Event=Alternative splicing, Alternative initiation; Named isoforms=3;
1701
+ CC Comment=Isoform 1 and isoform 2 arise due to the use of two
1702
+ CC alternative first exons joined to a common exon 2 at the same
1703
+ CC acceptor site but in different reading frames, resulting in two
1704
+ CC completely different isoforms;
1705
+ CC Name=1; Synonyms=p16INK4a;
1706
+ CC IsoId=O77617-1; Sequence=Displayed;
1707
+ CC Name=3;
1708
+ CC IsoId=O77617-2; Sequence=VSP_004099;
1709
+ CC Note=Produced by alternative initiation at Met-35 of isoform 1;
1710
+ CC Name=2; Synonyms=p19ARF;
1711
+ CC IsoId=O77618-1; Sequence=External;
1712
+ FT VAR_SEQ 1 34 Missing (in isoform 3).
1713
+ FT /FTId=VSP_004099."
1714
+ sp = SPTR.new(data)
1715
+ assert_equal({"Comment" => "Isoform 1 and isoform 2 arise due to the use of two alternative first exons joined to a common exon 2 at the same acceptor site but in different reading frames, resulting in two completely different isoforms",
1716
+ "Named isoforms" => "3",
1717
+ "Variants" => [{"IsoId" => ["O77617-1"],
1718
+ "Name" => "1",
1719
+ "Synonyms" => ["p16INK4a"],
1720
+ "Sequence" => ["Displayed"]},
1721
+ {"IsoId" => ["O77617-2"],
1722
+ "Name" => "3",
1723
+ "Synonyms" => [],
1724
+ "Sequence" => ["VSP_004099"]},
1725
+ {"IsoId" => ["O77618-1"],
1726
+ "Name" => "2",
1727
+ "Synonyms" => ["p19ARF"],
1728
+ "Sequence" => ["External"]}],
1729
+ "Event" => ["Alternative splicing", "Alternative initiation"]},
1730
+ sp.cc("ALTERNATIVE PRODUCTS"))
1731
+ assert_equal([{"From" => 1,
1732
+ "To" => 34,
1733
+ "Description"=>"Missing (in isoform 3).",
1734
+ "FTId" => "VSP_004099",
1735
+ "diff" => ["", nil],
1736
+ "original"=> ["VAR_SEQ", "1", "34",
1737
+ "Missing (in isoform 3).", "/FTId=VSP_004099."]}],
1738
+ sp.ft("VAR_SEQ"))
1739
+ end
1740
+
1741
+
1742
+ # Replacement of the comment line (CC) topic DATABASE by WEB RESOURCE
1743
+ def test_CC_web_resource
1744
+ # CC -!- DATABASE: NAME=ResourceName[; NOTE=FreeText][; WWW=WWWAddress][; FTP=FTPAddress].
1745
+ # CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText]; URL=WWWAddress.
1746
+ # The length of these lines may exceed 75 characters because long URL addresses are not wrapped into multiple lines.
1747
+ assert(true)
1748
+ end
1749
+
1750
+ # Introduction of the new line type OH (Organism Host) for viral hosts
1751
+ def test_OH_lines
1752
+ data = 'OS Tomato black ring virus (strain E) (TBRV).
1753
+ OC Viruses; ssRNA positive-strand viruses, no DNA stage; Comoviridae;
1754
+ OC Nepovirus; Subgroup B.
1755
+ OX NCBI_TaxID=12277;
1756
+ OH NCBI_TaxID=4681; Allium porrum (Leek).
1757
+ OH NCBI_TaxID=4045; Apium graveolens (Celery).
1758
+ OH NCBI_TaxID=161934; Beta vulgaris (Sugar beet).
1759
+ OH NCBI_TaxID=38871; Fraxinus (ash trees).
1760
+ OH NCBI_TaxID=4236; Lactuca sativa (Garden lettuce).
1761
+ OH NCBI_TaxID=4081; Lycopersicon esculentum (Tomato).
1762
+ OH NCBI_TaxID=39639; Narcissus pseudonarcissus (Daffodil).
1763
+ OH NCBI_TaxID=3885; Phaseolus vulgaris (Kidney bean) (French bean).
1764
+ OH NCBI_TaxID=35938; Robinia pseudoacacia (Black locust).
1765
+ OH NCBI_TaxID=23216; Rubus (bramble).
1766
+ OH NCBI_TaxID=4113; Solanum tuberosum (Potato).
1767
+ OH NCBI_TaxID=13305; Tulipa.
1768
+ OH NCBI_TaxID=3603; Vitis.'
1769
+
1770
+ res = [{'NCBI_TaxID' => '4681', 'HostName' => 'Allium porrum (Leek)'},
1771
+ {'NCBI_TaxID' => '4045', 'HostName' => 'Apium graveolens (Celery)'},
1772
+ {'NCBI_TaxID' => '161934', 'HostName' => 'Beta vulgaris (Sugar beet)'},
1773
+ {'NCBI_TaxID' => '38871', 'HostName' => 'Fraxinus (ash trees)'},
1774
+ {'NCBI_TaxID' => '4236', 'HostName' => 'Lactuca sativa (Garden lettuce)'},
1775
+ {'NCBI_TaxID' => '4081', 'HostName' => 'Lycopersicon esculentum (Tomato)'},
1776
+ {'NCBI_TaxID' => '39639', 'HostName' => 'Narcissus pseudonarcissus (Daffodil)'},
1777
+ {'NCBI_TaxID' => '3885',
1778
+ 'HostName' => 'Phaseolus vulgaris (Kidney bean) (French bean)'},
1779
+ {'NCBI_TaxID' => '35938', 'HostName' => 'Robinia pseudoacacia (Black locust)'},
1780
+ {'NCBI_TaxID' => '23216', 'HostName' => 'Rubus (bramble)'},
1781
+ {'NCBI_TaxID' => '4113', 'HostName' => 'Solanum tuberosum (Potato)'},
1782
+ {'NCBI_TaxID' => '13305', 'HostName' => 'Tulipa'},
1783
+ {'NCBI_TaxID' => '3603', 'HostName' => 'Vitis'}]
1784
+ sp = SPTR.new(data)
1785
+ assert_equal(res, sp.oh)
1786
+ end
1787
+
1788
+ def test_OH_line_exception
1789
+ data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
1790
+ OH NCBI_TaxID=23216x: Rubus (bramble)."
1791
+ sp = SPTR.new(data)
1792
+ assert_raise(ArgumentError) { sp.oh }
1793
+ end
1794
+
1795
+ end
1796
+
1797
+ class TestOSLine < Test::Unit::TestCase
1798
+ def test_uncapitalized_letter_Q32725_9POAL
1799
+ data = "OS unknown cyperaceous sp.\n"
1800
+ sp = SPTR.new(data)
1801
+ assert_equal('unknown cyperaceous sp.', sp.os.first['os'])
1802
+ end
1803
+
1804
+ def test_period_trancation_O63147
1805
+ data = "OS Hippotis sp. Clark and Watts 825.\n"
1806
+ sp = SPTR.new(data)
1807
+ assert_equal('Hippotis sp. Clark and Watts 825.', sp.os.first['os'])
1808
+ end
1809
+ end
1810
+
1811
+ end # module Bio
1812
+