bio-fasta_retrieve 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/bio-fasta_retrieve.rb +47 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5769b466a33fbea95e39d68e59bdcb05b3aa4909
4
+ data.tar.gz: f2cb672aa9e4cb7abde0dd7a57ac2423bf07a3e0
5
+ SHA512:
6
+ metadata.gz: 9e01d0a422faf4ea23bedbe989e2db0c8e2fbe974106a49e4aa99ed294b430ed5c032c8964dbba5b02c894daf31bbf8047a22bef6d604315c859374ec3199b69
7
+ data.tar.gz: 4074fb0903cb3efbbd9baa6bc06cd7c3f05a6f34216b0fabefbfe11d0db5444787d7283cc71d2d98699d3698ac91f06ae6393a6cc8892b08af6194cd6462f3ea
@@ -0,0 +1,47 @@
1
+ # Copyright:: Copyright (C) 2015
2
+ # Andrei Rozanski <rozanski.andrei@gmail.com>
3
+ # License:: The Ruby License
4
+ # == Description
5
+ # This file containts DNA sequence retriever from UCSC that outputs Bio::Sequence object
6
+
7
+ #Define module FastaRetriever
8
+ module FastaRetriever
9
+ class Retriever
10
+
11
+ require 'bio' # For creation of fasta object.
12
+ require 'nokogiri' # For sequence retrieve and parse.
13
+ require 'open-uri' # For sequence retrieve.
14
+
15
+ def initialize;end
16
+
17
+ #input organism, chromosome, start, end separated by space (i.e. hg19 chr10 1000 2000)
18
+ def self.retrieve(organism,chr,start_coord,end_coord)
19
+ address="http://genome.ucsc.edu/cgi-bin/das/#{organism}/dna?segment=#{chr}:#{start_coord},#{end_coord}"
20
+ xml=ucsc_connect(address)
21
+ seq=compose_fasta(xml,organism,chr,start_coord,end_coord)
22
+ puts seq
23
+ end # End retrieve
24
+
25
+ #connects to ucsc and retrieve xml with DNA sequence
26
+ def self.ucsc_connect(address)
27
+ xml = Nokogiri::XML(open(address))
28
+ xml_res=parse_xml(xml)
29
+ return xml_res
30
+ end # End ucsc_connect
31
+
32
+ #parse sequence from xml
33
+ def self.parse_xml(xml)
34
+ xml_res=xml.xpath("//DNA").text.tr("\n","")
35
+ return xml_res
36
+ end # End parse_xml
37
+
38
+ def self.compose_fasta(sequence,organism,chr,start_coord,end_coord)
39
+ seq=Bio::Sequence::NA.new(sequence)
40
+ seq_res=seq.to_fasta("#{organism}_#{chr}_#{start_coord}_#{end_coord}")
41
+ return seq_res
42
+ end # End compose_fasta
43
+
44
+ end # End Class Retriever
45
+ end # End Module FastaRetriever
46
+
47
+
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-fasta_retrieve
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrei Rozanski
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-08-13 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Gem that aims to retrieve DNA sequence from UCSC
14
+ email: rozanski.andrei@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/bio-fasta_retrieve.rb
20
+ homepage: http://rubygems.org/gems/bio-fasta_retrieve.rb
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.2.3
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Using DAS from UCSC, retrieve DNA sequences from given coordinates.
44
+ test_files: []