bio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +14 -122
- data/bin/br_biofetch.rb +2 -2
- data/bin/br_bioflat.rb +2 -2
- data/bin/br_biogetseq.rb +2 -2
- data/bin/br_pmfetch.rb +3 -3
- data/doc/Changes-0.7.rd +77 -0
- data/doc/KEGG_API.rd +523 -232
- data/doc/KEGG_API.rd.ja +529 -207
- data/doc/Tutorial.rd +48 -11
- data/lib/bio.rb +59 -6
- data/lib/bio/alignment.rb +713 -103
- data/lib/bio/appl/bl2seq/report.rb +2 -18
- data/lib/bio/appl/blast.rb +108 -91
- data/lib/bio/appl/blast/format0.rb +33 -18
- data/lib/bio/appl/blast/format8.rb +6 -20
- data/lib/bio/appl/blast/report.rb +293 -429
- data/lib/bio/appl/blast/rexml.rb +8 -22
- data/lib/bio/appl/blast/wublast.rb +21 -12
- data/lib/bio/appl/blast/xmlparser.rb +180 -183
- data/lib/bio/appl/blat/report.rb +127 -30
- data/lib/bio/appl/clustalw.rb +87 -59
- data/lib/bio/appl/clustalw/report.rb +20 -22
- data/lib/bio/appl/emboss.rb +113 -20
- data/lib/bio/appl/fasta.rb +173 -198
- data/lib/bio/appl/fasta/format10.rb +244 -347
- data/lib/bio/appl/gcg/msf.rb +212 -0
- data/lib/bio/appl/gcg/seq.rb +195 -0
- data/lib/bio/appl/genscan/report.rb +5 -23
- data/lib/bio/appl/hmmer.rb +8 -45
- data/lib/bio/appl/hmmer/report.rb +2 -20
- data/lib/bio/appl/iprscan/report.rb +374 -0
- data/lib/bio/appl/mafft.rb +87 -50
- data/lib/bio/appl/mafft/report.rb +151 -44
- data/lib/bio/appl/muscle.rb +52 -0
- data/lib/bio/appl/phylip/alignment.rb +129 -0
- data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
- data/lib/bio/appl/probcons.rb +41 -0
- data/lib/bio/appl/psort.rb +89 -96
- data/lib/bio/appl/psort/report.rb +6 -22
- data/lib/bio/appl/pts1.rb +263 -0
- data/lib/bio/appl/sim4.rb +26 -36
- data/lib/bio/appl/sim4/report.rb +2 -18
- data/lib/bio/appl/sosui/report.rb +5 -20
- data/lib/bio/appl/spidey/report.rb +2 -2
- data/lib/bio/appl/targetp/report.rb +4 -20
- data/lib/bio/appl/tcoffee.rb +55 -0
- data/lib/bio/appl/tmhmm/report.rb +4 -20
- data/lib/bio/command.rb +235 -64
- data/lib/bio/data/aa.rb +21 -26
- data/lib/bio/data/codontable.rb +2 -20
- data/lib/bio/data/na.rb +19 -4
- data/lib/bio/db.rb +27 -12
- data/lib/bio/db/aaindex.rb +2 -20
- data/lib/bio/db/embl/common.rb +4 -21
- data/lib/bio/db/embl/embl.rb +33 -85
- data/lib/bio/db/embl/sptr.rb +612 -302
- data/lib/bio/db/embl/swissprot.rb +10 -29
- data/lib/bio/db/embl/trembl.rb +10 -29
- data/lib/bio/db/embl/uniprot.rb +10 -29
- data/lib/bio/db/fantom.rb +15 -20
- data/lib/bio/db/fasta.rb +3 -3
- data/lib/bio/db/genbank/common.rb +37 -46
- data/lib/bio/db/genbank/ddbj.rb +6 -18
- data/lib/bio/db/genbank/genbank.rb +47 -186
- data/lib/bio/db/genbank/genpept.rb +4 -17
- data/lib/bio/db/genbank/refseq.rb +4 -17
- data/lib/bio/db/gff.rb +103 -35
- data/lib/bio/db/go.rb +4 -20
- data/lib/bio/db/kegg/brite.rb +26 -36
- data/lib/bio/db/kegg/compound.rb +81 -85
- data/lib/bio/db/kegg/drug.rb +98 -0
- data/lib/bio/db/kegg/enzyme.rb +133 -110
- data/lib/bio/db/kegg/expression.rb +2 -20
- data/lib/bio/db/kegg/genes.rb +208 -238
- data/lib/bio/db/kegg/genome.rb +164 -285
- data/lib/bio/db/kegg/glycan.rb +114 -157
- data/lib/bio/db/kegg/keggtab.rb +242 -303
- data/lib/bio/db/kegg/kgml.rb +117 -160
- data/lib/bio/db/kegg/orthology.rb +112 -0
- data/lib/bio/db/kegg/reaction.rb +54 -69
- data/lib/bio/db/kegg/taxonomy.rb +331 -0
- data/lib/bio/db/lasergene.rb +209 -0
- data/lib/bio/db/litdb.rb +3 -27
- data/lib/bio/db/medline.rb +228 -249
- data/lib/bio/db/nbrf.rb +3 -3
- data/lib/bio/db/newick.rb +510 -0
- data/lib/bio/db/nexus.rb +1854 -0
- data/lib/bio/db/pdb.rb +5 -17
- data/lib/bio/db/pdb/atom.rb +2 -18
- data/lib/bio/db/pdb/chain.rb +2 -18
- data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
- data/lib/bio/db/pdb/model.rb +2 -18
- data/lib/bio/db/pdb/pdb.rb +73 -34
- data/lib/bio/db/pdb/residue.rb +4 -20
- data/lib/bio/db/pdb/utils.rb +2 -18
- data/lib/bio/db/prosite.rb +403 -422
- data/lib/bio/db/rebase.rb +84 -40
- data/lib/bio/db/soft.rb +404 -0
- data/lib/bio/db/transfac.rb +5 -17
- data/lib/bio/feature.rb +106 -52
- data/lib/bio/io/das.rb +32 -42
- data/lib/bio/io/dbget.rb +2 -20
- data/lib/bio/io/ddbjxml.rb +77 -138
- data/lib/bio/io/ebisoap.rb +158 -0
- data/lib/bio/io/ensembl.rb +229 -0
- data/lib/bio/io/fastacmd.rb +89 -82
- data/lib/bio/io/fetch.rb +163 -96
- data/lib/bio/io/flatfile.rb +170 -73
- data/lib/bio/io/flatfile/bdb.rb +3 -16
- data/lib/bio/io/flatfile/index.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +3 -2
- data/lib/bio/io/higet.rb +12 -31
- data/lib/bio/io/keggapi.rb +210 -269
- data/lib/bio/io/ncbisoap.rb +155 -0
- data/lib/bio/io/pubmed.rb +169 -147
- data/lib/bio/io/registry.rb +4 -20
- data/lib/bio/io/soapwsdl.rb +43 -38
- data/lib/bio/io/sql.rb +242 -305
- data/lib/bio/location.rb +407 -285
- data/lib/bio/map.rb +410 -0
- data/lib/bio/pathway.rb +558 -695
- data/lib/bio/reference.rb +272 -75
- data/lib/bio/sequence.rb +255 -13
- data/lib/bio/sequence/aa.rb +71 -10
- data/lib/bio/sequence/common.rb +187 -33
- data/lib/bio/sequence/compat.rb +59 -4
- data/lib/bio/sequence/format.rb +54 -7
- data/lib/bio/sequence/generic.rb +3 -3
- data/lib/bio/sequence/na.rb +328 -26
- data/lib/bio/shell.rb +11 -4
- data/lib/bio/shell/core.rb +221 -160
- data/lib/bio/shell/demo.rb +18 -15
- data/lib/bio/shell/interface.rb +14 -12
- data/lib/bio/shell/irb.rb +95 -0
- data/lib/bio/shell/object.rb +45 -26
- data/lib/bio/shell/plugin/blast.rb +42 -0
- data/lib/bio/shell/plugin/codon.rb +22 -14
- data/lib/bio/shell/plugin/das.rb +58 -0
- data/lib/bio/shell/plugin/emboss.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +22 -11
- data/lib/bio/shell/plugin/flatfile.rb +2 -2
- data/lib/bio/shell/plugin/keggapi.rb +13 -6
- data/lib/bio/shell/plugin/midi.rb +4 -4
- data/lib/bio/shell/plugin/obda.rb +2 -2
- data/lib/bio/shell/plugin/psort.rb +56 -0
- data/lib/bio/shell/plugin/seq.rb +35 -8
- data/lib/bio/shell/plugin/soap.rb +87 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
- data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
- data/lib/bio/shell/script.rb +25 -0
- data/lib/bio/shell/setup.rb +109 -0
- data/lib/bio/shell/web.rb +70 -58
- data/lib/bio/tree.rb +850 -0
- data/lib/bio/util/color_scheme.rb +84 -107
- data/lib/bio/util/color_scheme/buried.rb +5 -24
- data/lib/bio/util/color_scheme/helix.rb +5 -24
- data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
- data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
- data/lib/bio/util/color_scheme/strand.rb +5 -24
- data/lib/bio/util/color_scheme/taylor.rb +5 -24
- data/lib/bio/util/color_scheme/turn.rb +5 -24
- data/lib/bio/util/color_scheme/zappo.rb +5 -24
- data/lib/bio/util/contingency_table.rb +70 -43
- data/lib/bio/util/restriction_enzyme.rb +228 -0
- data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
- data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
- data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
- data/lib/bio/util/sirna.rb +4 -22
- data/sample/color_scheme_na.rb +4 -12
- data/sample/enzymes.rb +78 -0
- data/sample/goslim.rb +5 -13
- data/sample/psortplot_html.rb +4 -12
- data/test/data/blast/2.2.15.blastp.m7 +876 -0
- data/test/data/embl/AB090716.embl.rel89 +63 -0
- data/test/data/fasta/example1.txt +75 -0
- data/test/data/fasta/example2.txt +21 -0
- data/test/data/iprscan/merged.raw +32 -0
- data/test/data/iprscan/merged.txt +74 -0
- data/test/data/soft/GDS100_partial.soft +92 -0
- data/test/data/soft/GSE3457_family_partial.soft +874 -0
- data/test/functional/bio/io/test_ensembl.rb +103 -0
- data/test/functional/bio/io/test_soapwsdl.rb +5 -17
- data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
- data/test/unit/bio/appl/blast/test_report.rb +3 -16
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
- data/test/unit/bio/appl/genscan/test_report.rb +3 -16
- data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
- data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
- data/test/unit/bio/appl/mafft/test_report.rb +63 -0
- data/test/unit/bio/appl/sosui/test_report.rb +3 -16
- data/test/unit/bio/appl/targetp/test_report.rb +3 -16
- data/test/unit/bio/appl/test_blast.rb +3 -16
- data/test/unit/bio/appl/test_fasta.rb +4 -16
- data/test/unit/bio/appl/test_pts1.rb +140 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
- data/test/unit/bio/data/test_aa.rb +4 -17
- data/test/unit/bio/data/test_codontable.rb +3 -16
- data/test/unit/bio/data/test_na.rb +3 -3
- data/test/unit/bio/db/embl/test_common.rb +3 -16
- data/test/unit/bio/db/embl/test_embl.rb +3 -16
- data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
- data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
- data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
- data/test/unit/bio/db/kegg/test_genes.rb +3 -16
- data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
- data/test/unit/bio/db/test_aaindex.rb +2 -2
- data/test/unit/bio/db/test_fasta.rb +3 -16
- data/test/unit/bio/db/test_gff.rb +3 -16
- data/test/unit/bio/db/test_lasergene.rb +95 -0
- data/test/unit/bio/db/test_newick.rb +56 -0
- data/test/unit/bio/db/test_nexus.rb +360 -0
- data/test/unit/bio/db/test_prosite.rb +5 -18
- data/test/unit/bio/db/test_rebase.rb +11 -25
- data/test/unit/bio/db/test_soft.rb +138 -0
- data/test/unit/bio/io/test_ddbjxml.rb +5 -17
- data/test/unit/bio/io/test_ensembl.rb +109 -0
- data/test/unit/bio/io/test_fastacmd.rb +3 -16
- data/test/unit/bio/io/test_flatfile.rb +237 -0
- data/test/unit/bio/io/test_soapwsdl.rb +4 -17
- data/test/unit/bio/sequence/test_aa.rb +3 -3
- data/test/unit/bio/sequence/test_common.rb +3 -16
- data/test/unit/bio/sequence/test_compat.rb +3 -16
- data/test/unit/bio/sequence/test_na.rb +29 -3
- data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
- data/test/unit/bio/test_alignment.rb +16 -27
- data/test/unit/bio/test_command.rb +242 -25
- data/test/unit/bio/test_db.rb +3 -16
- data/test/unit/bio/test_feature.rb +4 -16
- data/test/unit/bio/test_location.rb +4 -16
- data/test/unit/bio/test_map.rb +230 -0
- data/test/unit/bio/test_pathway.rb +4 -16
- data/test/unit/bio/test_reference.rb +2 -2
- data/test/unit/bio/test_sequence.rb +7 -19
- data/test/unit/bio/test_shell.rb +3 -16
- data/test/unit/bio/test_tree.rb +593 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
- data/test/unit/bio/util/test_color_scheme.rb +6 -18
- data/test/unit/bio/util/test_contingency_table.rb +6 -18
- data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
- data/test/unit/bio/util/test_sirna.rb +3 -16
- metadata +228 -169
- data/doc/BioRuby.rd.ja +0 -225
- data/doc/Design.rd.ja +0 -341
- data/doc/TODO.rd.ja +0 -138
- data/lib/bio/appl/fasta/format6.rb +0 -37
- data/lib/bio/db/kegg/cell.rb +0 -88
- data/lib/bio/db/kegg/ko.rb +0 -178
- data/lib/bio/shell/rails/Rakefile +0 -10
- data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
- data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
- data/lib/bio/shell/rails/config/boot.rb +0 -19
- data/lib/bio/shell/rails/config/database.yml +0 -85
- data/lib/bio/shell/rails/config/environment.rb +0 -53
- data/lib/bio/shell/rails/config/environments/development.rb +0 -19
- data/lib/bio/shell/rails/config/environments/production.rb +0 -19
- data/lib/bio/shell/rails/config/environments/test.rb +0 -19
- data/lib/bio/shell/rails/config/routes.rb +0 -19
- data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
- data/lib/bio/shell/rails/public/404.html +0 -8
- data/lib/bio/shell/rails/public/500.html +0 -8
- data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
- data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
- data/lib/bio/shell/rails/public/dispatch.rb +0 -10
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +0 -277
- data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
- data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
- data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
- data/lib/bio/shell/rails/public/robots.txt +0 -1
- data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
- data/lib/bio/shell/rails/script/about +0 -3
- data/lib/bio/shell/rails/script/breakpointer +0 -3
- data/lib/bio/shell/rails/script/console +0 -3
- data/lib/bio/shell/rails/script/destroy +0 -3
- data/lib/bio/shell/rails/script/generate +0 -3
- data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
- data/lib/bio/shell/rails/script/performance/profiler +0 -3
- data/lib/bio/shell/rails/script/plugin +0 -3
- data/lib/bio/shell/rails/script/process/reaper +0 -3
- data/lib/bio/shell/rails/script/process/spawner +0 -3
- data/lib/bio/shell/rails/script/process/spinner +0 -3
- data/lib/bio/shell/rails/script/runner +0 -3
- data/lib/bio/shell/rails/script/server +0 -42
- data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -0,0 +1,209 @@
|
|
1
|
+
#
|
2
|
+
# bio/db/lasergene.rb - Interface for DNAStar Lasergene sequence file format
|
3
|
+
#
|
4
|
+
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
|
5
|
+
# Copyright:: Copyright (c) 2007 Center for Biomedical Research Informatics, University of Minnesota (http://cbri.umn.edu)
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id: lasergene.rb,v 1.3 2007/04/05 23:35:40 trevor Exp $
|
9
|
+
#
|
10
|
+
|
11
|
+
module Bio #:nodoc:
|
12
|
+
|
13
|
+
#
|
14
|
+
# bio/db/lasergene.rb - Interface for DNAStar Lasergene sequence file format
|
15
|
+
#
|
16
|
+
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
|
17
|
+
# Copyright:: Copyright (c) 2007 Center for Biomedical Research Informatics, University of Minnesota (http://cbri.umn.edu)
|
18
|
+
# License:: The Ruby License
|
19
|
+
#
|
20
|
+
# = Description
|
21
|
+
#
|
22
|
+
# Bio::Lasergene reads DNAStar Lasergene formatted sequence files, or +.seq+
|
23
|
+
# files. It only expects to find one sequence per file.
|
24
|
+
#
|
25
|
+
# = Usage
|
26
|
+
#
|
27
|
+
# require 'bio'
|
28
|
+
# filename = 'MyFile.seq'
|
29
|
+
# lseq = Bio::Lasergene.new( IO.readlines(filename) )
|
30
|
+
# lseq.entry_id # => "Contig 1"
|
31
|
+
# lseq.seq # => ATGACGTATCCAAAGAGGCGTTACC
|
32
|
+
#
|
33
|
+
# = Comments
|
34
|
+
#
|
35
|
+
# I'm only aware of the following three kinds of Lasergene file formats. Feel
|
36
|
+
# free to send me other examples that may not currently be accounted for.
|
37
|
+
#
|
38
|
+
# File format 1:
|
39
|
+
#
|
40
|
+
# ## begin ##
|
41
|
+
# "Contig 1" (1,934)
|
42
|
+
# Contig Length: 934 bases
|
43
|
+
# Average Length/Sequence: 467 bases
|
44
|
+
# Total Sequence Length: 1869 bases
|
45
|
+
# Top Strand: 2 sequences
|
46
|
+
# Bottom Strand: 2 sequences
|
47
|
+
# Total: 4 sequences
|
48
|
+
# ^^
|
49
|
+
# ATGACGTATCCAAAGAGGCGTTACCGGAGAAGAAGACACCGCCCCCGCAGTCCTCTTGGCCAGATCCTCCGCCGCCGCCCCTGGCTCGTCCACCCCCGCCACAGTTACCGCTGGAGAAGGAAAAATGGCATCTTCAWCACCCGCCTATCCCGCAYCTTCGGAWRTACTATCAAGCGAACCACAGTCAGAACGCCCTCCTGGGCGGTGGACATGATGAGATTCAATATTAATGACTTTCTTCCCCCAGGAGGGGGCTCAAACCCCCGCTCTGTGCCCTTTGAATACTACAGAATAAGAAAGGTTAAGGTTGAATTCTGGCCCTGCTCCCCGATCACCCAGGGTGACAGGGGAATGGGCTCCAGTGCTGWTATTCTAGMTGATRRCTTKGTAACAAAGRCCACAGCCCTCACCTATGACCCCTATGTAAACTTCTCCTCCCGCCATACCATAACCCAGCCCTTCTCCTACCRCTCCCGYTACTTTACCCCCAAACCTGTCCTWGATKCCACTATKGATKACTKCCAACCAAACAACAAAAGAAACCAGCTGTGGSTGAGACTACAWACTGCTGGAAATGTAGACCWCGTAGGCCTSGGCACTGCGTKCGAAAACAGTATATACGACCAGGAATACAATATCCGTGTMACCATGTATGTACAATTCAGAGAATTTAATCTTAAAGACCCCCCRCTTMACCCKTAATGAATAATAAMAACCATTACGAAGTGATAAAAWAGWCTCAGTAATTTATTYCATATGGAAATTCWSGGCATGGGGGGGAAAGGGTGACGAACKKGCCCCCTTCCTCCSTSGMYTKTTCYGTAGCATTCYTCCAMAAYACCWAGGCAGYAMTCCTCCSATCAAGAGcYTSYACAGCTGGGACAGCAGTTGAGGAGGACCATTCAAAGGGGGTCGGATTGCTGGTAATCAGA
|
50
|
+
# ## end ##
|
51
|
+
#
|
52
|
+
#
|
53
|
+
# File format 2:
|
54
|
+
#
|
55
|
+
# ## begin ##
|
56
|
+
# ^^: 350,935
|
57
|
+
# Contig 1 (1,935)
|
58
|
+
# Contig Length: 935 bases
|
59
|
+
# Average Length/Sequence: 580 bases
|
60
|
+
# Total Sequence Length: 2323 bases
|
61
|
+
# Top Strand: 2 sequences
|
62
|
+
# Bottom Strand: 2 sequences
|
63
|
+
# Total: 4 sequences
|
64
|
+
# ^^
|
65
|
+
# ATGTCGGGGAAATGCTTGACCGCGGGCTACTGCTCATCATTGCTTTCTTTGTGGTATATCGTGCCGTTCTGTTTTGCTGTGCTCGTCAACGCCAGCGGCGACAGCAGCTCTCATTTTCAGTCGATTTATAACTTGACGTTATGTGAGCTGAATGGCACGAACTGGCTGGCAGACAACTTTAACTGGGCTGTGGAGACTTTTGTCATCTTCCCCGTGTTGACTCACATTGTTTCCTATGGTGCACTCACTACCAGTCATTTTCTTGACACAGTTGGTCTAGTTACTGTGTCTACCGCCGGGTTTTATCACGGGCGGTACGTCTTGAGTAGCATCTACGCGGTCTGTGCTCTGGCTGCGTTGATTTGCTTCGCCATCAGGTTTGCGAAGAACTGCATGTCCTGGCGCTACTCTTGCACTAGATACACCAACTTCCTCCTGGACACCAAGGGCAGACTCTATCGTTGGCGGTCGCCTGTCATCATAGAGAAAGGGGGTAAGGTTGAGGTCGAAGGTCATCTGATCGATCTCAAAAGAGTTGTGCTTGATGGCTCTGTGGCGACACCTTTAACCAGAGTTTCAGCGGAACAATGGGGTCGTCCCTAGACGACTTTTGCCATGATAGTACAGCCCCACAGAAGGTGCTCTTGGCGTTTTCCATCACCTACACGCCAGTGATGATATATGCCCTAAAGGTAAGCCGCGGCCGACTTTTGGGGCTTCTGCACCTTTTGATTTTTTTGAACTGTGCCTTTACTTTCGGGTACATGACATTCGTGCACTTTCGGAGCACGAACAAGGTCGCGCTCACTATGGGAGCAGTAGTCGCACTCCTTTGGGGGGTGTACTCAGCCATAGAAACCTGGAAATTCATCACCTCCAGATGCCGTTGTGCTTGCTAGGCCGCAAGTACATTCTGGCCCCTGCCCACCACGTTG
|
66
|
+
# ## end ##
|
67
|
+
#
|
68
|
+
# File format 3 (non-standard Lasergene header):
|
69
|
+
#
|
70
|
+
# ## begin ##
|
71
|
+
# LOCUS PRU87392 15411 bp RNA linear VRL 17-NOV-2000
|
72
|
+
# DEFINITION Porcine reproductive and respiratory syndrome virus strain VR-2332,
|
73
|
+
# complete genome.
|
74
|
+
# ACCESSION U87392 AF030244 U00153
|
75
|
+
# VERSION U87392.3 GI:11192298
|
76
|
+
# [...cut...]
|
77
|
+
# 3'UTR 15261..15411
|
78
|
+
# polyA_site 15409
|
79
|
+
# ORIGIN
|
80
|
+
# ^^
|
81
|
+
# atgacgtataggtgttggctctatgccttggcatttgtattgtcaggagctgtgaccattggcacagcccaaaacttgctgcacagaaacacccttctgtgatagcctccttcaggggagcttagggtttgtccctagcaccttgcttccggagttgcactgctttacggtctctccacccctttaaccatgtctgggatacttgatcggtgcacgtgtacccccaatgccagggtgtttatggcggagggccaagtctactgcacacgatgcctcagtgcacggtctctccttcccctgaacctccaagtttctgagctcggggtgctaggcctattctacaggcccgaagagccactccggtggacgttgccacgtgcattccccactgttgagtgctcccccgccggggcctgctggctttctgcaatctttccaatcgcacgaatgaccagtggaaacctgaacttccaacaaagaatggtacgggtcgcagctgagctttacagagccggccagctcacccctgcagtcttgaaggctctacaagtttatgaacggggttgccgctggtaccccattgttggacctgtccctggagtggccgttttcgccaattccctacatgtgagtgataaacctttcccgggagcaactcacgtgttgaccaacctgccgctcccgcagagacccaagcctgaagacttttgcccctttgagtgtgctatggctactgtctatgacattggtcatgacgccgtcatgtatgtggccgaaaggaaagtctcctgggcccctcgtggcggggatgaagtgaaatttgaagctgtccccggggagttgaagttgattgcgaaccggctccgcacctccttcccgccccaccacacagtggacatgtctaagttcgccttcacagcccctgggtgtggtgtttctatgcgggtcgaacgccaacacggctgccttcccgctgacactgtccctgaaggcaactgctggtggagcttgtttgacttgcttccactggaagttcagaacaaagaaattcgccatgctaaccaatttggctaccagaccaagcatggtgtctctggcaagtacctacagcggaggctgca[...cut...]
|
82
|
+
# ## end ##
|
83
|
+
#
|
84
|
+
class Lasergene
|
85
|
+
# Entire header before the sequence
|
86
|
+
attr_reader :comments
|
87
|
+
|
88
|
+
# Sequence
|
89
|
+
#
|
90
|
+
# Bio::Sequence::NA or Bio::Sequence::AA object
|
91
|
+
attr_reader :sequence
|
92
|
+
|
93
|
+
# Name of sequence
|
94
|
+
# * Parsed from standard Lasergene header
|
95
|
+
attr_reader :name
|
96
|
+
|
97
|
+
# Contig length, length of present sequence
|
98
|
+
# * Parsed from standard Lasergene header
|
99
|
+
attr_reader :contig_length
|
100
|
+
|
101
|
+
# Average length per sequence
|
102
|
+
# * Parsed from standard Lasergene header
|
103
|
+
attr_reader :average_length
|
104
|
+
|
105
|
+
# Length of parent sequence
|
106
|
+
# * Parsed from standard Lasergene header
|
107
|
+
attr_reader :total_length
|
108
|
+
|
109
|
+
# Number of top strand sequences
|
110
|
+
# * Parsed from standard Lasergene header
|
111
|
+
attr_reader :top_strand_sequences
|
112
|
+
|
113
|
+
# Number of bottom strand sequences
|
114
|
+
# * Parsed from standard Lasergene header
|
115
|
+
attr_reader :bottom_strand_sequences
|
116
|
+
|
117
|
+
# Number of sequences
|
118
|
+
# * Parsed from standard Lasergene header
|
119
|
+
attr_reader :total_sequences
|
120
|
+
|
121
|
+
DELIMITER_1 = '^\^\^:' # Match '^^:' at the beginning of a line
|
122
|
+
DELIMITER_2 = '^\^\^' # Match '^^' at the beginning of a line
|
123
|
+
|
124
|
+
def initialize(lines)
|
125
|
+
process(lines)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Is the comment header recognized as standard Lasergene format?
|
129
|
+
#
|
130
|
+
# ---
|
131
|
+
# *Arguments*
|
132
|
+
# * _none_
|
133
|
+
# *Returns*:: +true+ _or_ +false+
|
134
|
+
def standard_comment?
|
135
|
+
@standard_comment
|
136
|
+
end
|
137
|
+
|
138
|
+
# Sequence
|
139
|
+
#
|
140
|
+
# Bio::Sequence::NA or Bio::Sequence::AA object
|
141
|
+
def seq
|
142
|
+
@sequence
|
143
|
+
end
|
144
|
+
|
145
|
+
# Name of sequence
|
146
|
+
# * Parsed from standard Lasergene header
|
147
|
+
def entry_id
|
148
|
+
@name
|
149
|
+
end
|
150
|
+
|
151
|
+
#########
|
152
|
+
protected
|
153
|
+
#########
|
154
|
+
|
155
|
+
def process(lines)
|
156
|
+
delimiter_1_indices = []
|
157
|
+
delimiter_2_indices = []
|
158
|
+
|
159
|
+
# If the data from the file is passed as one big String instead of
|
160
|
+
# broken into an Array, convert lines to an Array
|
161
|
+
if lines.kind_of? String
|
162
|
+
lines = lines.tr("\r", '').split("\n")
|
163
|
+
end
|
164
|
+
|
165
|
+
lines.each_with_index do |line, index|
|
166
|
+
if line.match DELIMITER_1
|
167
|
+
delimiter_1_indices << index
|
168
|
+
elsif line.match DELIMITER_2
|
169
|
+
delimiter_2_indices << index
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
raise InputError, "More than one delimiter of type '#{DELIMITER_1}'" if delimiter_1_indices.size > 1
|
174
|
+
raise InputError, "More than one delimiter of type '#{DELIMITER_2}'" if delimiter_2_indices.size > 1
|
175
|
+
raise InputError, "No comment to data separator of type '#{DELIMITER_2}'" if delimiter_2_indices.size < 1
|
176
|
+
|
177
|
+
if !delimiter_1_indices.empty?
|
178
|
+
# toss out DELIMETER_1 and anything preceding it
|
179
|
+
@comments = lines[ (delimiter_1_indices[0] + 1) .. (delimiter_2_indices[0] - 1) ]
|
180
|
+
else
|
181
|
+
@comments = lines[ 0 .. (delimiter_2_indices[0] - 1) ]
|
182
|
+
end
|
183
|
+
|
184
|
+
@standard_comment = false
|
185
|
+
if @comments[0] =~ %r{(.+)\s+\(\d+,\d+\)} # if we have a standard Lasergene comment
|
186
|
+
@standard_comment = true
|
187
|
+
@name = $1
|
188
|
+
comments.each do |comment|
|
189
|
+
if comment.match('Contig Length:\s+(\d+)')
|
190
|
+
@contig_length = $1.to_i
|
191
|
+
elsif comment.match('Average Length/Sequence:\s+(\d+)')
|
192
|
+
@average_length = $1.to_i
|
193
|
+
elsif comment.match('Total Sequence Length:\s+(\d+)')
|
194
|
+
@total_length = $1.to_i
|
195
|
+
elsif comment.match('Top Strand:\s+(\d+)')
|
196
|
+
@top_strand_sequences = $1.to_i
|
197
|
+
elsif comment.match('Bottom Strand:\s+(\d+)')
|
198
|
+
@bottom_strand_sequences = $1.to_i
|
199
|
+
elsif comment.match('Total:\s+(\d+)')
|
200
|
+
@total_sequences = $1.to_i
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
@comments = @comments.join('')
|
206
|
+
@sequence = Bio::Sequence.auto( lines[ (delimiter_2_indices[0] + 1) .. -1 ].join('') )
|
207
|
+
end
|
208
|
+
end # Lasergene
|
209
|
+
end # Bio
|
data/lib/bio/db/litdb.rb
CHANGED
@@ -1,34 +1,10 @@
|
|
1
1
|
#
|
2
2
|
# = bio/db/litdb.rb - LITDB database class
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2001
|
5
|
-
# License::
|
4
|
+
# Copyright:: Copyright (C) 2001 Toshiaki Katayama <k@bioruby.org>
|
5
|
+
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
# == Description
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# == Example
|
13
|
-
# == References
|
14
|
-
#
|
15
|
-
#--
|
16
|
-
#
|
17
|
-
# This library is free software; you can redistribute it and/or
|
18
|
-
# modify it under the terms of the GNU Lesser General Public
|
19
|
-
# License as published by the Free Software Foundation; either
|
20
|
-
# version 2 of the License, or (at your option) any later version.
|
21
|
-
#
|
22
|
-
# This library is distributed in the hope that it will be useful,
|
23
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
24
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
25
|
-
# Lesser General Public License for more details.
|
26
|
-
#
|
27
|
-
# You should have received a copy of the GNU Lesser General Public
|
28
|
-
# License along with this library; if not, write to the Free Software
|
29
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
30
|
-
#
|
31
|
-
#++
|
7
|
+
# $Id: litdb.rb,v 0.10 2007/04/05 23:35:40 trevor Exp $
|
32
8
|
#
|
33
9
|
|
34
10
|
require 'bio/db'
|
data/lib/bio/db/medline.rb
CHANGED
@@ -2,9 +2,16 @@
|
|
2
2
|
# = bio/db/medline.rb - NCBI PubMed/MEDLINE database class
|
3
3
|
#
|
4
4
|
# Copyright:: Copyright (C) 2001, 2005
|
5
|
-
#
|
6
|
-
# License::
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
7
|
#
|
8
|
+
# $Id: medline.rb,v 1.16 2007/04/05 23:35:40 trevor Exp $
|
9
|
+
#
|
10
|
+
|
11
|
+
require 'bio/db'
|
12
|
+
|
13
|
+
module Bio
|
14
|
+
|
8
15
|
# == Description
|
9
16
|
#
|
10
17
|
# NCBI PubMed/MEDLINE database class.
|
@@ -16,323 +23,295 @@
|
|
16
23
|
# medline.pmid == medline.entry_id
|
17
24
|
# medilne.mesh
|
18
25
|
#
|
19
|
-
|
20
|
-
#
|
21
|
-
# $Id: medline.rb,v 1.13 2006/02/18 15:03:47 nakao Exp $
|
22
|
-
#
|
23
|
-
#++
|
24
|
-
#
|
25
|
-
# This library is free software; you can redistribute it and/or
|
26
|
-
# modify it under the terms of the GNU Lesser General Public
|
27
|
-
# License as published by the Free Software Foundation; either
|
28
|
-
# version 2 of the License, or (at your option) any later version.
|
29
|
-
#
|
30
|
-
# This library is distributed in the hope that it will be useful,
|
31
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
32
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
33
|
-
# Lesser General Public License for more details.
|
34
|
-
#
|
35
|
-
# You should have received a copy of the GNU Lesser General Public
|
36
|
-
# License along with this library; if not, write to the Free Software
|
37
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
38
|
-
#
|
39
|
-
#--
|
40
|
-
#
|
41
|
-
|
42
|
-
require 'bio/db'
|
43
|
-
|
44
|
-
module Bio
|
45
|
-
|
46
|
-
# NCBI PubMed/MEDLINE database class.
|
47
|
-
class MEDLINE < NCBIDB
|
26
|
+
class MEDLINE < NCBIDB
|
48
27
|
|
49
|
-
|
50
|
-
|
51
|
-
|
28
|
+
#
|
29
|
+
def initialize(entry)
|
30
|
+
@pubmed = Hash.new('')
|
52
31
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
@pubmed[tag] += line[6..-1] if line.length > 6
|
32
|
+
tag = ''
|
33
|
+
entry.each_line do |line|
|
34
|
+
if line =~ /^\w/
|
35
|
+
tag = line[0,4].strip
|
59
36
|
end
|
37
|
+
@pubmed[tag] += line[6..-1] if line.length > 6
|
60
38
|
end
|
39
|
+
end
|
61
40
|
|
62
41
|
|
63
|
-
|
64
|
-
|
65
|
-
|
42
|
+
# returns a Reference object.
|
43
|
+
def reference
|
44
|
+
hash = Hash.new('')
|
66
45
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
46
|
+
hash['authors'] = authors
|
47
|
+
hash['title'] = title
|
48
|
+
hash['journal'] = journal
|
49
|
+
hash['volume'] = volume
|
50
|
+
hash['issue'] = issue
|
51
|
+
hash['pages'] = pages
|
52
|
+
hash['year'] = year
|
53
|
+
hash['pubmed'] = pmid
|
54
|
+
hash['medline'] = ui
|
55
|
+
hash['abstract'] = abstract
|
56
|
+
hash['mesh'] = mesh
|
57
|
+
hash['affiliations'] = affiliations
|
79
58
|
|
80
|
-
|
59
|
+
hash.delete_if { |k, v| v.nil? or v.empty? }
|
81
60
|
|
82
|
-
|
83
|
-
|
61
|
+
return Reference.new(hash)
|
62
|
+
end
|
84
63
|
|
85
64
|
|
86
|
-
|
65
|
+
### Common MEDLINE tags
|
87
66
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
67
|
+
# PMID - PubMed Unique Identifier
|
68
|
+
# Unique number assigned to each PubMed citation.
|
69
|
+
def pmid
|
70
|
+
@pubmed['PMID'].strip
|
71
|
+
end
|
72
|
+
alias entry_id pmid
|
94
73
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
74
|
+
# UI - MEDLINE Unique Identifier
|
75
|
+
# Unique number assigned to each MEDLINE citation.
|
76
|
+
def ui
|
77
|
+
@pubmed['UI'].strip
|
78
|
+
end
|
100
79
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
80
|
+
# TA - Journal Title Abbreviation
|
81
|
+
# Standard journal title abbreviation.
|
82
|
+
def ta
|
83
|
+
@pubmed['TA'].gsub(/\s+/, ' ').strip
|
84
|
+
end
|
85
|
+
alias journal ta
|
107
86
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
87
|
+
# VI - Volume
|
88
|
+
# Journal volume.
|
89
|
+
def vi
|
90
|
+
@pubmed['VI'].strip
|
91
|
+
end
|
92
|
+
alias volume vi
|
114
93
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
94
|
+
# IP - Issue
|
95
|
+
# The number of the issue, part, or supplement of the journal in which
|
96
|
+
# the article was published.
|
97
|
+
def ip
|
98
|
+
@pubmed['IP'].strip
|
99
|
+
end
|
100
|
+
alias issue ip
|
122
101
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
102
|
+
# PG - Page Number
|
103
|
+
# The full pagination of the article.
|
104
|
+
def pg
|
105
|
+
@pubmed['PG'].strip
|
106
|
+
end
|
128
107
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
end
|
136
|
-
pages = "#{from}-#{to}"
|
108
|
+
def pages
|
109
|
+
pages = pg
|
110
|
+
if pages =~ /-/
|
111
|
+
from, to = pages.split('-')
|
112
|
+
if (len = from.length - to.length) > 0
|
113
|
+
to = from[0,len] + to
|
137
114
|
end
|
138
|
-
|
115
|
+
pages = "#{from}-#{to}"
|
139
116
|
end
|
117
|
+
return pages
|
118
|
+
end
|
140
119
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
120
|
+
# DP - Publication Date
|
121
|
+
# The date the article was published.
|
122
|
+
def dp
|
123
|
+
@pubmed['DP'].strip
|
124
|
+
end
|
125
|
+
alias date dp
|
147
126
|
|
148
|
-
|
149
|
-
|
150
|
-
|
127
|
+
def year
|
128
|
+
dp[0,4]
|
129
|
+
end
|
151
130
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
131
|
+
# TI - Title Words
|
132
|
+
# The title of the article.
|
133
|
+
def ti
|
134
|
+
@pubmed['TI'].gsub(/\s+/, ' ').strip
|
135
|
+
end
|
136
|
+
alias title ti
|
158
137
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
138
|
+
# AB - Abstract
|
139
|
+
# Abstract.
|
140
|
+
def ab
|
141
|
+
@pubmed['AB'].gsub(/\s+/, ' ').strip
|
142
|
+
end
|
143
|
+
alias abstract ab
|
165
144
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
145
|
+
# AU - Author Name
|
146
|
+
# Authors' names.
|
147
|
+
def au
|
148
|
+
@pubmed['AU'].strip
|
149
|
+
end
|
171
150
|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
end
|
181
|
-
if suffix
|
182
|
-
author << " " + suffix
|
183
|
-
end
|
184
|
-
authors.push(author)
|
151
|
+
def authors
|
152
|
+
authors = []
|
153
|
+
au.split(/\n/).each do |author|
|
154
|
+
if author =~ / /
|
155
|
+
name = author.split(/\s+/)
|
156
|
+
suffix = name[-2] =~ /^[A-Z]+$/ ? name.pop : nil # Jr etc.
|
157
|
+
initial = name.pop.split(//).join('. ')
|
158
|
+
author = "#{name.join(' ')}, #{initial}."
|
185
159
|
end
|
186
|
-
|
160
|
+
if suffix
|
161
|
+
author << " " + suffix
|
162
|
+
end
|
163
|
+
authors.push(author)
|
187
164
|
end
|
165
|
+
return authors
|
166
|
+
end
|
188
167
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
168
|
+
# SO - Source
|
169
|
+
# Composite field containing bibliographic information.
|
170
|
+
def so
|
171
|
+
@pubmed['SO'].strip
|
172
|
+
end
|
173
|
+
alias source so
|
195
174
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
175
|
+
# MH - MeSH Terms
|
176
|
+
# NLM's controlled vocabulary.
|
177
|
+
def mh
|
178
|
+
@pubmed['MH'].strip.split(/\n/)
|
179
|
+
end
|
180
|
+
alias mesh mh
|
202
181
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
182
|
+
# AD - Affiliation
|
183
|
+
# Institutional affiliation and address of the first author, and grant
|
184
|
+
# numbers.
|
185
|
+
def ad
|
186
|
+
@pubmed['AD'].strip.split(/\n/)
|
187
|
+
end
|
188
|
+
alias affiliations ad
|
210
189
|
|
211
190
|
|
212
|
-
|
191
|
+
### Other MEDLINE tags
|
213
192
|
|
214
|
-
|
215
|
-
|
216
|
-
|
193
|
+
# AID - Article Identifier
|
194
|
+
# Article ID values may include the pii (controlled publisher identifier)
|
195
|
+
# or doi (Digital Object Identifier).
|
217
196
|
|
218
|
-
|
219
|
-
|
197
|
+
# CI - Copyright Information
|
198
|
+
# Copyright statement.
|
220
199
|
|
221
|
-
|
222
|
-
|
200
|
+
# CIN - Comment In
|
201
|
+
# Reference containing a comment about the article.
|
223
202
|
|
224
|
-
|
225
|
-
|
203
|
+
# CN - Collective Name
|
204
|
+
# Corporate author or group names with authorship responsibility.
|
226
205
|
|
227
|
-
|
228
|
-
|
206
|
+
# CON - Comment On
|
207
|
+
# Reference upon which the article comments.
|
229
208
|
|
230
|
-
|
231
|
-
|
209
|
+
# CY - Country
|
210
|
+
# The place of publication of the journal.
|
232
211
|
|
233
|
-
|
234
|
-
|
212
|
+
# DA - Date Created
|
213
|
+
# Used for internal processing at NLM.
|
235
214
|
|
236
|
-
|
237
|
-
|
215
|
+
# DCOM - Date Completed
|
216
|
+
# Used for internal processing at NLM.
|
238
217
|
|
239
|
-
|
240
|
-
|
218
|
+
# DEP - Date of Electronic Publication
|
219
|
+
# Electronic publication date.
|
241
220
|
|
242
|
-
|
243
|
-
|
221
|
+
# EDAT - Entrez Date
|
222
|
+
# The date the citation was added to PubMed.
|
244
223
|
|
245
|
-
|
246
|
-
|
224
|
+
# EIN - Erratum In
|
225
|
+
# Reference containing a published erratum to the article.
|
247
226
|
|
248
|
-
|
249
|
-
|
227
|
+
# GS - Gene Symbol
|
228
|
+
# Abbreviated gene names (used 1991 through 1996).
|
250
229
|
|
251
|
-
|
252
|
-
|
253
|
-
|
230
|
+
# ID - Identification Number
|
231
|
+
# Research grant numbers, contract numbers, or both that designate
|
232
|
+
# financial support by any agency of the US PHS (Public Health Service).
|
254
233
|
|
255
|
-
|
256
|
-
|
234
|
+
# IS - ISSN
|
235
|
+
# International Standard Serial Number of the journal.
|
257
236
|
|
258
|
-
|
259
|
-
|
237
|
+
# JC - Journal Title Code
|
238
|
+
# MEDLINE unique three-character code for the journal.
|
260
239
|
|
261
|
-
|
262
|
-
|
240
|
+
# JID - NLM Unique ID
|
241
|
+
# Unique journal ID in NLM's catalog of books, journals, and audiovisuals.
|
263
242
|
|
264
|
-
|
265
|
-
|
243
|
+
# LA - Language
|
244
|
+
# The language in which the article was published.
|
266
245
|
|
267
|
-
|
268
|
-
|
246
|
+
# LR - Last Revision Date
|
247
|
+
# The date a change was made to the record during a maintenance procedure.
|
269
248
|
|
270
|
-
|
271
|
-
|
272
|
-
|
249
|
+
# MHDA - MeSH Date
|
250
|
+
# The date MeSH terms were added to the citation. The MeSH date is the
|
251
|
+
# same as the Entrez date until MeSH are added.
|
273
252
|
|
274
|
-
|
275
|
-
|
253
|
+
# PHST - Publication History Status Date
|
254
|
+
# History status date.
|
276
255
|
|
277
|
-
|
278
|
-
|
256
|
+
# PS - Personal Name as Subject
|
257
|
+
# Individual is the subject of the article.
|
279
258
|
|
280
|
-
|
281
|
-
|
259
|
+
# PST - Publication Status
|
260
|
+
# Publication status.
|
282
261
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
262
|
+
# PT - Publication Type
|
263
|
+
# The type of material the article represents.
|
264
|
+
def pt
|
265
|
+
@pubmed['PT'].strip.split(/\n/)
|
266
|
+
end
|
267
|
+
alias publication_type pt
|
289
268
|
|
290
|
-
|
291
|
-
|
269
|
+
# RF - Number of References
|
270
|
+
# Number of bibliographic references for Review articles.
|
292
271
|
|
293
|
-
|
294
|
-
|
272
|
+
# RIN - Retraction In
|
273
|
+
# Retraction of the article
|
295
274
|
|
296
|
-
|
297
|
-
|
298
|
-
|
275
|
+
# RN - EC/RN Number
|
276
|
+
# Number assigned by the Enzyme Commission to designate a particular
|
277
|
+
# enzyme or by the Chemical Abstracts Service for Registry Numbers.
|
299
278
|
|
300
|
-
|
301
|
-
|
279
|
+
# ROF - Retraction Of
|
280
|
+
# Article being retracted.
|
302
281
|
|
303
|
-
|
304
|
-
|
282
|
+
# RPF - Republished From
|
283
|
+
# Original article.
|
305
284
|
|
306
|
-
|
307
|
-
|
285
|
+
# SB - Journal Subset
|
286
|
+
# Code for a specific set of journals.
|
308
287
|
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
288
|
+
# SI - Secondary Source Identifier
|
289
|
+
# Identifies a secondary source that supplies information, e.g., other
|
290
|
+
# data sources, databanks and accession numbers of molecular sequences
|
291
|
+
# discussed in articles.
|
313
292
|
|
314
|
-
|
315
|
-
|
293
|
+
# TT - Transliterated / Vernacular Title
|
294
|
+
# Non-Roman alphabet language titles are transliterated.
|
316
295
|
|
317
|
-
|
318
|
-
|
296
|
+
# UIN - Update In
|
297
|
+
# Update to the article.
|
319
298
|
|
320
|
-
|
321
|
-
|
299
|
+
# UOF - Update Of
|
300
|
+
# The article being updated.
|
322
301
|
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
302
|
+
# URLF - URL Full-Text
|
303
|
+
# Link to the full-text of article at provider's website. Links are
|
304
|
+
# incomplete. Use PmLink for the complete set of available links.
|
305
|
+
# [PmLink] http://www.ncbi.nlm.nih.gov/entrez/utils/pmlink_help.html
|
327
306
|
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
307
|
+
# URLS - URL Summary
|
308
|
+
# Link to the article summary at provider's website. Links are
|
309
|
+
# incomplete. Use PmLink for the complete set of available links.
|
310
|
+
# [PmLink] http://www.ncbi.nlm.nih.gov/entrez/utils/pmlink_help.html
|
332
311
|
|
333
|
-
|
312
|
+
end # MEDLINE
|
334
313
|
|
335
|
-
end
|
314
|
+
end # Bio
|
336
315
|
|
337
316
|
|
338
317
|
|