bio-fasta_retrieve 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/bio-fasta_retrieve.rb +47 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5769b466a33fbea95e39d68e59bdcb05b3aa4909
4
+ data.tar.gz: f2cb672aa9e4cb7abde0dd7a57ac2423bf07a3e0
5
+ SHA512:
6
+ metadata.gz: 9e01d0a422faf4ea23bedbe989e2db0c8e2fbe974106a49e4aa99ed294b430ed5c032c8964dbba5b02c894daf31bbf8047a22bef6d604315c859374ec3199b69
7
+ data.tar.gz: 4074fb0903cb3efbbd9baa6bc06cd7c3f05a6f34216b0fabefbfe11d0db5444787d7283cc71d2d98699d3698ac91f06ae6393a6cc8892b08af6194cd6462f3ea
@@ -0,0 +1,47 @@
1
+ # Copyright:: Copyright (C) 2015
2
+ # Andrei Rozanski <rozanski.andrei@gmail.com>
3
+ # License:: The Ruby License
4
+ # == Description
5
+ # This file containts DNA sequence retriever from UCSC that outputs Bio::Sequence object
6
+
7
+ #Define module FastaRetriever
8
+ module FastaRetriever
9
+ class Retriever
10
+
11
+ require 'bio' # For creation of fasta object.
12
+ require 'nokogiri' # For sequence retrieve and parse.
13
+ require 'open-uri' # For sequence retrieve.
14
+
15
+ def initialize;end
16
+
17
+ #input organism, chromosome, start, end separated by space (i.e. hg19 chr10 1000 2000)
18
+ def self.retrieve(organism,chr,start_coord,end_coord)
19
+ address="http://genome.ucsc.edu/cgi-bin/das/#{organism}/dna?segment=#{chr}:#{start_coord},#{end_coord}"
20
+ xml=ucsc_connect(address)
21
+ seq=compose_fasta(xml,organism,chr,start_coord,end_coord)
22
+ puts seq
23
+ end # End retrieve
24
+
25
+ #connects to ucsc and retrieve xml with DNA sequence
26
+ def self.ucsc_connect(address)
27
+ xml = Nokogiri::XML(open(address))
28
+ xml_res=parse_xml(xml)
29
+ return xml_res
30
+ end # End ucsc_connect
31
+
32
+ #parse sequence from xml
33
+ def self.parse_xml(xml)
34
+ xml_res=xml.xpath("//DNA").text.tr("\n","")
35
+ return xml_res
36
+ end # End parse_xml
37
+
38
+ def self.compose_fasta(sequence,organism,chr,start_coord,end_coord)
39
+ seq=Bio::Sequence::NA.new(sequence)
40
+ seq_res=seq.to_fasta("#{organism}_#{chr}_#{start_coord}_#{end_coord}")
41
+ return seq_res
42
+ end # End compose_fasta
43
+
44
+ end # End Class Retriever
45
+ end # End Module FastaRetriever
46
+
47
+
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-fasta_retrieve
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrei Rozanski
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-08-13 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Gem that aims to retrieve DNA sequence from UCSC
14
+ email: rozanski.andrei@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/bio-fasta_retrieve.rb
20
+ homepage: http://rubygems.org/gems/bio-fasta_retrieve.rb
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.2.3
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Using DAS from UCSC, retrieve DNA sequences from given coordinates.
44
+ test_files: []