bio-fasta_retrieve 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/bio-fasta_retrieve.rb +47 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5769b466a33fbea95e39d68e59bdcb05b3aa4909
|
4
|
+
data.tar.gz: f2cb672aa9e4cb7abde0dd7a57ac2423bf07a3e0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9e01d0a422faf4ea23bedbe989e2db0c8e2fbe974106a49e4aa99ed294b430ed5c032c8964dbba5b02c894daf31bbf8047a22bef6d604315c859374ec3199b69
|
7
|
+
data.tar.gz: 4074fb0903cb3efbbd9baa6bc06cd7c3f05a6f34216b0fabefbfe11d0db5444787d7283cc71d2d98699d3698ac91f06ae6393a6cc8892b08af6194cd6462f3ea
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Copyright:: Copyright (C) 2015
|
2
|
+
# Andrei Rozanski <rozanski.andrei@gmail.com>
|
3
|
+
# License:: The Ruby License
|
4
|
+
# == Description
|
5
|
+
# This file containts DNA sequence retriever from UCSC that outputs Bio::Sequence object
|
6
|
+
|
7
|
+
#Define module FastaRetriever
|
8
|
+
module FastaRetriever
|
9
|
+
class Retriever
|
10
|
+
|
11
|
+
require 'bio' # For creation of fasta object.
|
12
|
+
require 'nokogiri' # For sequence retrieve and parse.
|
13
|
+
require 'open-uri' # For sequence retrieve.
|
14
|
+
|
15
|
+
def initialize;end
|
16
|
+
|
17
|
+
#input organism, chromosome, start, end separated by space (i.e. hg19 chr10 1000 2000)
|
18
|
+
def self.retrieve(organism,chr,start_coord,end_coord)
|
19
|
+
address="http://genome.ucsc.edu/cgi-bin/das/#{organism}/dna?segment=#{chr}:#{start_coord},#{end_coord}"
|
20
|
+
xml=ucsc_connect(address)
|
21
|
+
seq=compose_fasta(xml,organism,chr,start_coord,end_coord)
|
22
|
+
puts seq
|
23
|
+
end # End retrieve
|
24
|
+
|
25
|
+
#connects to ucsc and retrieve xml with DNA sequence
|
26
|
+
def self.ucsc_connect(address)
|
27
|
+
xml = Nokogiri::XML(open(address))
|
28
|
+
xml_res=parse_xml(xml)
|
29
|
+
return xml_res
|
30
|
+
end # End ucsc_connect
|
31
|
+
|
32
|
+
#parse sequence from xml
|
33
|
+
def self.parse_xml(xml)
|
34
|
+
xml_res=xml.xpath("//DNA").text.tr("\n","")
|
35
|
+
return xml_res
|
36
|
+
end # End parse_xml
|
37
|
+
|
38
|
+
def self.compose_fasta(sequence,organism,chr,start_coord,end_coord)
|
39
|
+
seq=Bio::Sequence::NA.new(sequence)
|
40
|
+
seq_res=seq.to_fasta("#{organism}_#{chr}_#{start_coord}_#{end_coord}")
|
41
|
+
return seq_res
|
42
|
+
end # End compose_fasta
|
43
|
+
|
44
|
+
end # End Class Retriever
|
45
|
+
end # End Module FastaRetriever
|
46
|
+
|
47
|
+
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-fasta_retrieve
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrei Rozanski
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-08-13 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Gem that aims to retrieve DNA sequence from UCSC
|
14
|
+
email: rozanski.andrei@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/bio-fasta_retrieve.rb
|
20
|
+
homepage: http://rubygems.org/gems/bio-fasta_retrieve.rb
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.2.3
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: Using DAS from UCSC, retrieve DNA sequences from given coordinates.
|
44
|
+
test_files: []
|