RubyGems - bivy - Versions diffs - 0.0.5 - Mend

bivy 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

data/.gitignore +21 -0
data/History +16 -0
data/LICENSE +29 -0
data/README.rdoc +37 -0
data/Rakefile +43 -0
data/TODO +12 -0
data/VERSION +1 -0
data/bin/bivy.rb +56 -0
data/bin/pubmed_to_bivy.rb +78 -0
data/doc/config.yaml +13 -0
data/doc/src/default.css +126 -0
data/doc/src/default.template +38 -0
data/doc/src/tutorial/basic_flow.page +71 -0
data/doc/src/tutorial/index.page +8 -0
data/doc/src/tutorial/new_formats_and_media.page +83 -0
data/jrn_abbrev/abbr_html.tgz +0 -0
data/jrn_abbrev/abbr_to_journal.yaml +676 -0
data/jrn_abbrev/download_abbrevs.rb +20 -0
data/jrn_abbrev/for_ruby_class.rb +686 -0
data/jrn_abbrev/html_to_yaml.rb +50 -0
data/lib/bibliography.rb +144 -0
data/lib/bivy.rb +4 -0
data/lib/citation.rb +194 -0
data/lib/format.rb +120 -0
data/lib/format/acs.rb +88 -0
data/lib/format/bioinformatics.rb +33 -0
data/lib/format/bmc.rb +38 -0
data/lib/format/jtp.rb +30 -0
data/lib/format/mla.rb +50 -0
data/lib/formatter.rb +276 -0
data/lib/journal.rb +6 -0
data/lib/journal/iso_to_full.yaml +1320 -0
data/lib/journal/medline_to_full.yaml +7 -0
data/lib/journal/medline_to_iso.yaml +45 -0
data/lib/media.rb +88 -0
data/lib/media/html.rb +65 -0
data/lib/ooffice.rb +39 -0
data/lib/pubmed.rb +209 -0
data/lib/rtf.rb +217 -0
data/old_stuff/old_list2refs.rb +103 -0
data/old_stuff/pubmed2html.rb +119 -0
data/old_stuff/pubmed_bib_write.rb +92 -0
data/old_stuff/xml.tmp.xml +115 -0
data/scripts/merge_bibs.rb +70 -0
data/spec/bibliography_spec.rb +127 -0
data/spec/citation_positions.odt +0 -0
data/spec/formatter_spec.rb +14 -0
data/spec/formatter_spec/cits_after.xml +2 -0
data/spec/formatter_spec/cits_before.xml +2 -0
data/spec/formatter_spec/content.xml +2 -0
data/spec/ooffice_spec.rb +27 -0
data/spec/pubmed_spec.rb +26 -0
data/spec/spec_helper.rb +7 -0
data/spec/testfiles/doc1.odt +0 -0
metadata +136 -0

data/lib/journal/medline_to_full.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+Anal Chem: Analytical Chemistry
+Mol Cell Proteomics: Molecular and Cellular Proteomics
+Cell Signal: Cellular Signalling
+J Proteome Res: Journal of Proteome Research
+Nat Chem Biol: Nature Chemical Biology
+Nat Biotechnol: Nature Biotechnology

data/lib/journal/medline_to_iso.yaml ADDED Viewed

@@ -0,0 +1,45 @@
+---
+Nature: Nature
+Biomed Mass Spectrom: Biomed. Mass Spectrom.
+Nat Biotechnol: Nat. Biotechnol.
+Anal. Chem.: Anal. Chem.
+Nat Chem Biol: Nat. Chem. Biol.
+J. Proteome Res.: J. Proteome Res.
+Rapid Commun Mass Spectrom: Rapid Commun. Mass Spectrom.
+KDD Workshop on Mining Temporal and Sequential Data: KDD Workshop MTSD
+J Chromatogr B Analyt Technol Biomed Life Sci: J. Chromatogr., B
+J. Chromatogr., A: J. Chromatogr., A
+J Chemom: J. Chemom.
+Genome Res: Genome Res.
+Biomed. Mass Spectrom.: Biomed. Mass Spectrom.
+SIAM J. Num. Anal.: SIAM J. Num. Anal.
+Proteomics: Proteomics
+J. Chemom.: J. Chemom.
+Eur. Food Res. Technol.: Eur. Food Res. Technol.
+Environ Sci Technol: Environ. Sci. Technol.
+Analytica Chimica Acta: Anal. Chim. Acta
+Nature Methods: Nature Methods
+J Chromatogr A: J. Chromatogr., A
+IEEE ASSP: IEEE ASSP
+Nucleic Acids Res: Nucleic Acids Res.
+Mol Cell Proteomics: Mol. Cell. Proteomics
+J Proteome Res: J. Proteome Res.
+J Mol Biol: J. Mol. Biol.
+J. Am. Soc. Mass Spectrom.: J. Am. Soc. Mass Spectrom.
+J. Mol. Biol.: J. Mol. Biol.
+Bioinformatics: Bioinformatics
+Anal Chem: Anal. Chem.
+Mol Cell Proteomics: Mol. Cell. Proteomics
+Cell Signal: Cell Signal.
+Curr Opin Chem Biol: Curr. Opin. Chem. Biol.
+Proc IEEE Comput Syst Bioinform Conf: Proc. IEEE Comput. Syst. Bioinform. Conf.
+Lab Chip: Lab Chip
+Clin J Oncol Nurs: Clin. J. Oncol. Nurs.
+Mass Spectrom Rev: Mass Spectrom. Rev.
+Nat Protoc: Nat. Protoc.
+Conf Proc IEEE Eng Med Biol Soc: Conf. Proc. IEEE Eng. Med. Biol. Soc.
+Curr Opin Oncol: Curr. Opin. Oncol.
+J Mass Spectrom: J. Mass Spectrom.
+Trends Pharmacol Sci: Trends Pharmacol. Sci.
+Assay Drug Dev Technol: Assay Drug Dev. Technol.
+Mol Pharmacol: Mol. Pharmacol.

data/lib/media.rb ADDED Viewed

@@ -0,0 +1,88 @@
+module Media
+  # note that you need to add the shortcut to module Format::MediaForwarding
+  # hash if you want to be able to access it!
+  # add to this class the conversion from the filename (as a symbol) to the
+  # properly capitalized classname.  If the class name is just capitalized and
+  # all lower case, not necessary.
+  Symbol_to_class_string = { }
+  #:html => 'HTML'
+  def self.new(tp=:jtp)
+    require "media/#{tp}"
+    #puts( $".grep(/html/) )
+    klass_st = ((x = Symbol_to_class_string[tp]) ? x : tp.to_s.capitalize)
+    klass = Media.const_get(klass_st)
+    klass.new
+  end
+  def header
+  end
+  def footer
+  end
+  def call_it(method, string)
+    if var = string
+      send(method, var.to_s)
+    else
+      nil
+    end
+  end
+  def parenthesize(string)
+    '(' + string + ')'
+  end
+  def bracket(string)
+    '[' + string + ']'
+  end
+  def br(string)
+    call_it(:bracket, string)
+  end
+  def par(string)
+    call_it(:parenthesize, string)
+  end
+  # italicize
+  def i(string)
+    call_it(:italics, string)
+  end
+  # bold
+  def b(string)
+    call_it(:bold, string)
+  end
+  # underline
+  def u(string)
+    call_it(:underline, string)
+  end
+  def italics(string)
+    "<span style=\"font-style:italic;\">" + string + "</span>"
+  end
+  def bold(string)
+    "<span style=\"font-weight:bold;\">" + string + "</span>"
+  end
+  def underline(string)
+    "<span style=\"text-decoration:underline;\">" + string + "</span>"
+  end
+  # create the final bibliography string in whatever media you desire
+  # the example here is html
+  def format(format_object, citations)
+    cts = citations.map do |cit|
+      "  <li>" + format_object.format(cit) + "</li>"
+    end
+    "<ol>\n" + cts.join("\n") + "\n</ol>\n"
+  end
+end

data/lib/media/html.rb ADDED Viewed

@@ -0,0 +1,65 @@
+class Media::HTML
+  include Media
+  Media::Symbol_to_class_string[:html] = 'HTML'
+  def header
+    "<html><body>"
+  end
+  def footer
+    "</body></html>"
+  end
+  def italics(string)
+    "<span style=\"font-style:italic;\">" + string + "</span>"
+  end
+  def bold(string)
+    "<span style=\"font-weight:bold;\">" + string + "</span>"
+  end
+  def underline(string)
+    "<span style=\"text-decoration:underline;\">" + string + "</span>"
+  end
+  def list(citations_as_strings)
+    cts = citations_as_strings.map do |cit|
+      "\t<li>#{cit}</li>"
+    end
+    "<ol>\n" + cts.join("\n") + "\n</ol>\n"
+  end
+  # expects opening and closing tags.  Operates on last one.
+  # trailing text (outside a tag) is operated on if existing
+  # <tag>text</tag> => <tag>text.</tag>
+  # <tag>text</tag>more_text => '...more_text.'
+  # if the text already has a period, then no change
+  # method periodize (TODO: should alias, really)
+  def periodize(array_or_string)
+    if array_or_string.is_a?(Array)
+      array_or_string.map do |st|
+        periodize(st)
+      end
+    else
+      st = array_or_string
+      if st[-1,1] == '>'
+        st.sub(/(.*)(<\/.*?>)/) do |v|
+          if $1[-1,1] =~ /[\.\?\!]/
+            $1 + $2
+          else
+            $1 + '.' + $2
+          end
+        end
+      else
+        if st[-1,1] =~ /[\.\?\!]/
+          st
+        else
+          st << '.'
+        end
+      end
+    end
+  end
+end

data/lib/ooffice.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require 'fileutils'
+FU = FileUtils
+class OpenOffice
+  # unzips the file, gives a string of the content xml and will replace it
+  # whatever you pass back (preferably a string;)
+  # requires write access to the directory where the file is located
+  # The next time you open the file, it will act like you've corrupted the
+  # file (the character count is probably off, etc) just let it clean it up
+  # for you!
+  # new_basename = base name of the new file (preferably <name>.odt)
+  def modify_content(filename, new_basename, &blk)
+    content_file = 'content.xml'
+    basename = File.basename(filename)
+    tmpdir = basename + ".unzip.tmp"
+    Dir.chdir(File.dirname(filename)) do
+      if File.exist?(tmpdir)
+        warn "#{tmpdir} already exists!"
+        warn "deleting contents of #{tmpdir}"
+        FU.rm_rf(tmpdir)
+      end
+      FU.mkpath(tmpdir)
+      FU.cp(basename, tmpdir)
+      Dir.chdir(tmpdir) do
+        print `unzip -q #{basename}`
+        string = IO.read(content_file)
+        replace_with = blk.call(string)
+        File.open(content_file,'w') {|fh| fh.print(replace_with) }
+        FU.rm(basename, :force => true)
+        to_include = Dir["*"]
+        print `zip -r -q #{new_basename} #{to_include.map {|v| "'" + v + "'" }.join(' ')}`
+        FU.mv new_basename, '..'
+      end
+      FU.rm_rf tmpdir
+    end
+  end
+end

data/lib/pubmed.rb ADDED Viewed

@@ -0,0 +1,209 @@
+require 'open-uri'
+require 'rexml/document'
+require 'iconv'
+require 'citation'
+# given the html page where the display is specified as xml
+# extracts out the requested pieces
+class PubMed < Citation::Article
+  attr_accessor :pmid
+  # also takes pmid=hash of values to set
+  def initialize(pmid=nil, identifier=nil)
+    @quotes = []
+    if pmid.is_a? Hash
+      ########## THIS WHOLE MESS SHOULD BE ENCAPSULATED/INHERITED! but can't get
+      #inheritance with authors= working for some reason
+      @authors = []
+      pmid.each do |k,v|
+        if k == 'authors'
+          v.each do |auth|
+          if auth.is_a? String
+            authors.push( Citation::Author.from_s(auth) )
+          else
+            authors.push( auth )
+          end
+          end
+        else
+          send("#{k}=".to_sym, v)
+        end
+      end
+      ############ <-- END MESS
+    else
+      @authors = []
+      @pmid = pmid
+      @bibtype = :article
+      if pmid
+        begin
+          url = query_builder(pmid)
+          xml_string = get_xml(url)
+          extract_attrs_from_xml(xml_string)
+        end
+      end
+      if identifier
+        @ident = identifier
+      else
+        if pmid
+          @ident = create_id
+        end
+      end
+    end
+  end
+  # returns xml from online (parses html output).  No internet connection gives nil
+  def get_xml(query)
+    handle = open(query)
+    xml = handle.read
+    handle.close
+    xml
+  end
+  # first author's last name + year collapsing any spaces
+  def create_id
+    (@authors[0].last.to_s + @year.to_s).sub(/\s+/,'')
+  end
+  def inspect
+    st = "<#{self.class}:##{self.__id__} "
+    st << ( %w(authors ident quotes abstract journal_medline title year month vol issue pages).reject{|v| (v == :authors || v == :url)}.push(:bibtype).map {|v| ":#{v}=>#{send(v).inspect}"}.join(", ") )
+    st << " @authors=[#{authors.map{|g| g.inspect }.join(", ")}]"
+    st << ">"
+    st
+  end
+  # Builds the query to ask for a citation given a pubmed id
+  # valid types are xml, medline, (...need to figure out others)
+  private
+  # returns pubmed query based on pubmed id with xml as the output type.  Note that the xml is embedded in the page's html.
+  #   Example: http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=xml&list_uids=14654843&query_hl=6
+  #
+  # http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=Pubmed&id=11283582&rettype=xml
+  def query_builder(pmid)
+    type = 'xml'
+    #base_url = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?'
+    base_url = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'
+    cgi_params = ['db=Pubmed', "rettype=#{type}", 'retmode=text', "id=#{pmid}"].join('&')
+    base_url + cgi_params
+  end
+  # get an xml element's text according to its path (assumes single element)
+  def get_e_text(element, path)
+    #element = @xml if element == nil
+    els = element.elements.to_a(path)
+    if els.size > 1
+      raise "More than one #{path}!"
+    elsif els.size == 0
+      return nil
+    else
+      begin
+        text = els[0].get_text.value
+        return text
+      rescue NoMethodError
+        return nil
+      end
+    end
+  end
+  def get_author_list(xml)
+    auths = xml.elements.to_a("//PubmedArticle/MedlineCitation/Article/AuthorList/Author")
+    authors = auths.collect do |auth|
+      last_name = get_e_text(auth, "LastName")
+      initials = get_e_text(auth, "Initials")
+      ## I think we are getting author names out in UTF-8 which is not being interpreted properly.
+      ## Transform characters into something more standard, eh
+      begin
+        last_name = Iconv.new('iso-8859-15', 'utf-8').iconv(last_name)
+      rescue Iconv::IllegalSequence
+        last_name = "**BADCHARS**"
+      end
+      begin
+        initials = Iconv.new('iso-8859-15', 'utf-8').iconv(initials)
+      rescue Iconv::IllegalSequence
+        initials = "**BADINITS**"
+      end
+      Citation::Author.new(last_name, initials)
+    end
+  end
+  # if they are not set from the xml, tries to set from hashes or raises a
+  # RuntimeError
+  def set_journals_or_die(journal_medline)
+    error_messages = []
+    unless @journal_iso
+      if Journal::Medline_to_ISO.key?(journal_medline)
+        @journal_iso = Journal::Medline_to_ISO[journal_medline]
+      else
+        error_messages << "Expect key for '#{journal_medline}' in Journal::Medline_to_ISO"
+        error_messages << "(alter file journal/medline_to_iso.yaml)"
+      end
+    end
+    unless @journal_full
+      if Journal::Medline_to_Full.key?(journal_medline)
+        @journal_full = Journal::Medline_to_Full[journal_medline]
+      else
+        error_messages << "Expect key for '#{journal_medline}' in Journal::Medline_to_Full"
+        error_messages << "(alter file journal/medline_to_full.yaml)"
+      end
+    end
+    if error_messages.size > 0
+      label = "******************************************************************"
+      error_messages.unshift label
+      error_messages.unshift ''
+      error_messages << "Aborting!"
+      error_messages << label
+      error_messages << ''
+      raise(error_messages.join("\n"))
+    end
+  end
+  def extract_attrs_from_xml(xml_string)
+    xml = REXML::Document.new xml_string
+    art = "//PubmedArticle/MedlineCitation/Article/"
+    @title = get_e_text(xml, art + "ArticleTitle")
+    #puts "TITLE: "
+    #puts @title
+    @journal_medline = get_e_text(xml, "//PubmedArticle/MedlineCitation/MedlineJournalInfo/MedlineTA")
+    @journal_full = get_e_text(xml, art + 'Journal/Title')
+    @journal_iso = get_e_text(xml, art + 'Journal/ISOAbbreviation')
+    set_journals_or_die(@journal_medline)
+    #puts "THREE JOURNALS"
+    #puts @journal_medline
+    #puts @journal_full
+    #puts @journal_iso
+    @authors = get_author_list(xml)
+    iss = art + "Journal/JournalIssue/"
+    pdate = iss + "PubDate/"
+    @vol = get_e_text(xml, iss + "Volume")
+    @issue = get_e_text(xml, iss + "Issue")
+    @year = get_e_text(xml, pdate + "Year")
+    @month = get_e_text(xml, pdate + "Month")
+    @pages = get_e_text(xml, art + "Pagination/MedlinePgn") || '[Epub]'
+    @abstract = get_e_text(xml, art + "Abstract/AbstractText") || ''
+  end
+  # unnecessary now..
+  def pubmed_extract_xml_from_html(string)
+    html = ""
+    if string =~ /<dd><pre>(.*)<\/pre><\/dd>/m
+      html = $1
+      html.gsub!(/<\/?font.*?>/, '')
+      html.gsub!(/<\/?b.*?>/, '')
+      html.gsub!(/\&lt;/, '<')
+      html.gsub!(/\&gt;/, '>')
+      html.gsub!(/\&quot;/, '"')
+    end
+    html
+  end
+end