bio-ucsc-util 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ #(c) Copyright 2012 Nicholas A Thrower. All Rights Reserved.
2
+ # create Rakefile for shared library compilation
3
+
4
+ path = File.expand_path(File.dirname(__FILE__))
5
+
6
+ path_external = File.join(path, "../../lib/bio/ucsc/src/")
7
+
8
+ version = File.open(File.join(path_external,"Version"),'r')
9
+ Version = version.read
10
+ version.close
11
+
12
+ Source = "ucsc-util-src-#{Version}.tgz"
13
+
14
+ File.open(File.join(path,"Rakefile"),"w") do |rakefile|
15
+ rakefile.write <<-RAKE
16
+ require 'rbconfig'
17
+ require 'fileutils'
18
+ include FileUtils::Verbose
19
+ require 'rake/clean'
20
+
21
+ task :prepare do
22
+ sh "tar xzvf #{Source}"
23
+ cp("makefile","ucsc-util-src-#{Version}/lib")
24
+ end
25
+
26
+ task :compile do
27
+ # build libraries
28
+ cd("ucsc-util-src-#{Version}/lib") do
29
+ sh "make"
30
+ case Config::CONFIG['host_os']
31
+ when /linux/
32
+ sh "make libucsc.so.1"
33
+ cp("libucsc.so.1","#{path_external}")
34
+ when /darwin/
35
+ sh "make libucsc.1.dylib"
36
+ cp("libucsc.1.dylib","#{path_external}")
37
+ else raise NotImplementedError, "ucsc-util not supported on your platform"
38
+ end
39
+ cp("libucsc.a","#{path_external}")
40
+ end
41
+ end
42
+
43
+ task :clobber do
44
+ rm_f("#{path_external}libucsc.a")
45
+ rm_f("#{path_external}libucsc.so.1")
46
+ rm_f("#{path_external}libucsc.1.dylib")
47
+ end
48
+
49
+ task :clean do
50
+ rm_rf("ucsc-util-src-#{Version}")
51
+ end
52
+
53
+ desc "clean compile"
54
+ task :make_clean do
55
+ cd("ucsc-util-src-#{Version}") do
56
+ sh "make clean"
57
+ end
58
+ end
59
+
60
+ task :default => [:prepare,:compile,:clean]
61
+
62
+ RAKE
63
+
64
+ end
@@ -0,0 +1,3 @@
1
+ require 'ffi'
2
+ require 'bio/ucsc/util'
3
+ require 'bio/ucsc/big_wig'
@@ -0,0 +1,216 @@
1
+ # == big_wig.rb
2
+ # This file contains the BigWig class
3
+ #
4
+ # == Contact
5
+ #
6
+ # Author:: Nicholas A. Thrower
7
+ # Copyright:: Copyright (c) 2012 Nicholas A Thrower
8
+ # License:: See LICENSE.txt for more details
9
+ #
10
+
11
+ # :nodoc:
12
+ module Bio
13
+ module Ucsc
14
+ # The BigWig class interacts with bigWig files
15
+ class BigWig
16
+ require 'bio/ucsc/binding'
17
+ # bigWig file name
18
+ attr_accessor :filename
19
+ # pointer to bbiFile
20
+ attr_accessor :bbi_file
21
+ # convenience method to create a new BigWig and open it.
22
+ def self.open(*args)
23
+ self.new(*args).open
24
+ end
25
+ # Returns a new BigWig.
26
+ def initialize(f=nil, opts={})
27
+ @filename = f
28
+ return self
29
+ end
30
+ # opens the file
31
+ def open
32
+ raise ArgumentError, "filename undefined" unless filename
33
+ raise NameError, "#{filename} not found" unless File.exist?(filename)
34
+ raise LoadError, "#{filename} bad format" unless Binding::isBigWig(filename)
35
+ @bbi_file = Binding::bigWigFileOpen(filename)
36
+ return self
37
+ end
38
+ # closes the file
39
+ def close
40
+ Binding::bbiFileClose(bbi_file) if bbi_file
41
+ @bbi_file = nil
42
+ end
43
+ # returns the caclulated standard deviation
44
+ def std_dev(chrom=nil,opts={})
45
+ if(chrom)
46
+ self.summary(chrom,0,self.chrom_length(chrom),1,{type:'std'}).first
47
+ else
48
+ bwf,bbi_sum = prepare_bwf(opts)
49
+ return Binding::calcStdFromSums(bbi_sum[:sumData], bbi_sum[:sumSquares], bbi_sum[:validCount])
50
+ end
51
+ end
52
+ # Percent of bases in region containing actual data
53
+ def coverage(chrom=nil,opts={})
54
+ if(chrom)
55
+ self.summary(chrom,0,self.chrom_length(chrom),1,{type:'coverage'}).first
56
+ else
57
+ bwf,bbi_sum = prepare_bwf(opts)
58
+ return bbi_sum[:validCount] / chrom_length.to_f
59
+ end
60
+ end
61
+ # Returns the minimum value of items
62
+ def min(chrom=nil,opts={})
63
+ if(chrom)
64
+ self.summary(chrom,0,self.chrom_length(chrom),1,{type:'min'}).first
65
+ else
66
+ bwf,bbi_sum = prepare_bwf(opts)
67
+ return bbi_sum[:minVal]
68
+ end
69
+ end
70
+ # Returns the maximum value of items
71
+ def max(chrom=nil,opts={})
72
+ if(chrom)
73
+ self.summary(chrom,0,self.chrom_length(chrom),1,{type:'max'}).first
74
+ else
75
+ bwf,bbi_sum = prepare_bwf(opts)
76
+ return bbi_sum[:maxVal]
77
+ end
78
+ end
79
+ # Returns the mean value of items
80
+ def mean(chrom=nil,opts={})
81
+ if(chrom)
82
+ self.summary(chrom,0,self.chrom_length(chrom),1,{type:'mean'}).first
83
+ else
84
+ bwf,bbi_sum = prepare_bwf(opts)
85
+ return bbi_sum[:sumData]/bbi_sum[:validCount].to_f
86
+ end
87
+ end
88
+ # Total bases containing actual data
89
+ def bases_covered(opts={})
90
+ bwf,bbi_sum = prepare_bwf(opts)
91
+ return bbi_sum[:validCount]
92
+ end
93
+ # Returns size of given chromosome or the sum of all chromosomes
94
+ def chrom_length(chrom=nil)
95
+ chrom.nil? ? chrom_list.inject(0){|sum,chrom|sum+=chrom[:size]} : Binding::bbiChromSize(bbi_file,chrom)
96
+ end
97
+ # prints details about the file:
98
+ # - minMax/m => Only output the minimum and maximum values
99
+ # - zooms/z => Display zoom level details
100
+ # - chroms/c => Display chrom details
101
+ # - udcDir/u => /dir/to/cache - place to put cache for remote bigBed/bigWigs
102
+ def info(opts={})
103
+ min_max =opts[:m] ||= opts[:minMax]
104
+ zooms =opts[:z] ||= opts[:zooms]
105
+ chroms =opts[:c] ||= opts[:chroms]
106
+ bwf,bbi_sum = prepare_bwf(opts)
107
+ # print min/max
108
+ if(min_max)
109
+ printf "%f %f\n", bbi_sum[:minVal], bbi_sum[:maxVal]
110
+ return
111
+ end
112
+ # start summary
113
+ printf "version: %d\n", bwf[:version]
114
+ printf "isCompressed: %s\n", (bwf[:uncompressBufSize] > 0 ? "yes" : "no")
115
+ printf "isSwapped: %i\n", bwf[:isSwapped] ? 1 : 0
116
+ printf "primaryDataSize: %i\n",bwf[:unzoomedIndexOffset] - bwf[:unzoomedDataOffset]
117
+ unless(bwf[:levelList].null?)
118
+ list = Binding::BbiZoomLevel.new(bwf[:levelList])
119
+ printf "primaryIndexSize: %i\n", list[:dataOffset] - bwf[:unzoomedIndexOffset]
120
+ end
121
+ # print zoom level details
122
+ printf "zoomLevels: %d\n", bwf[:zoomLevels]
123
+ if(zooms)
124
+ zoom = Binding::BbiZoomLevel.new(bwf[:levelList])
125
+ while !zoom.null?
126
+ printf "\t%d\t%d\n", zoom[:reductionLevel], zoom[:indexOffset] - zoom[:dataOffset]
127
+ zoom = zoom[:next]
128
+ end
129
+ end
130
+ # print chrom details
131
+
132
+ printf "chromCount: %d\n", chrom_list.size
133
+ if(chroms)
134
+ chrom_list.each do |chrom|
135
+ printf "\t%s %d %d\n", chrom[:name], chrom[:id], chrom[:size]
136
+ end
137
+ end
138
+ # finish summary
139
+ printf "basesCovered: %i\n", bbi_sum[:validCount]
140
+ printf "mean: %f\n", bbi_sum[:sumData]/bbi_sum[:validCount]
141
+ printf "min: %f\n", bbi_sum[:minVal]
142
+ printf "max: %f\n", bbi_sum[:maxVal]
143
+ printf "std: %f\n", Binding::calcStdFromSums(bbi_sum[:sumData], bbi_sum[:sumSquares], bbi_sum[:validCount])
144
+ return
145
+ end
146
+ # retrieves summary information from the bigWig for the given range.
147
+ # - chrom => Sequence name for summary
148
+ # - start => Start of range (0 based)
149
+ # - stop => End of range
150
+ # - count => Number of datapoints to compute (1 for simple summary)
151
+ # hash Options:
152
+ # * :udcDir - /dir/to/cache - place to put cache for remote bigBed/bigWigs
153
+ # * :type => Summary type string
154
+ # * * mean - average value in region (default)
155
+ # * * min - minimum value in region
156
+ # * * max - maximum value in region
157
+ # * * std - standard deviation in region
158
+ # * * coverage - %% of region that is covered
159
+ def summary(chrom, start, stop, count, opts={})
160
+ type = opts[:type] || opts[:t] || 'mean'
161
+ udc_dir = opts[:u] ||= opts[:udcDir] ||= Binding::udcDefaultDir()
162
+ Binding::udcSetDefaultDir(udc_dir)
163
+ # allocate the array
164
+ summaryValues = FFI::MemoryPointer.new(:double,count)
165
+ # initialize to all 'NaN'
166
+ summaryValues.write_array_of_type(:double,:write_string,["NaN"]*count)
167
+ # fill in with Summary Data
168
+ Binding::bigWigSummaryArray(bbi_file, chrom, start, stop, Binding::bbiSummaryTypeFromString(type),count,summaryValues)
169
+ return summaryValues.read_array_of_double(count)
170
+ end
171
+ # creates a new smoothed bigWig file at the supplied location. Smoothing options:
172
+ # - chrom => restrict smoothing to a given chromosome
173
+ # - cutoff => probe count cutoff[median]
174
+ # - window => rolling window size
175
+ # - type => smoothing algorithm [avg]
176
+ # * * 'avg' - average depth in window
177
+ # * * 'probe' - count of regions (probes) crossing 'cutoff' in window
178
+ # Big Wig options:
179
+ # - :blockSize => Number of items to bundle in r-tree [256]
180
+ # - :itemsPerSlot => Number of data points bundled at lowest level [1024]
181
+ # - :unc => If set, do not use compression
182
+ # - :udcDir => /dir/to/cache - place to put cache for remote bigBed/bigWigs
183
+ def smooth(out_file,opts={})
184
+ verb = opts[:v] || 0
185
+ window = opts[:window] || 250
186
+ cutoff = opts[:cutoff] || self.mean
187
+ block_size = opts[:block_size]||256
188
+ chrom = opts[:chrom]||nil
189
+ items_per_slot = opts[:items_per_slot]||1024
190
+ unc = opts[:unc]||false
191
+ do_compress = !unc
192
+ type = opts[:type]||'avg'
193
+ udc_dir = opts[:u] ||= opts[:udcDir] ||= Binding::udcDefaultDir()
194
+ Binding::bigWigFileSmooth(filename, chrom, block_size, items_per_slot, do_compress, window, verb, out_file, type, cutoff)
195
+ end
196
+
197
+ private
198
+ # configures the temporary directory in case of remote files and returns a new BbiFile Struct and BbiSummaryElement
199
+ def prepare_bwf(opts)
200
+ udc_dir = opts[:u] ||= opts[:udcDir] ||= Binding::udcDefaultDir()
201
+ Binding::udcSetDefaultDir(udc_dir)
202
+ return Binding::BbiFile.new(bbi_file), Binding::bbiTotalSummary(bbi_file)
203
+ end
204
+ # returns an array of BbiChromInfo items in file
205
+ def chrom_list
206
+ chrom = Binding::BbiChromInfo.new(Binding::bbiChromList(bbi_file))
207
+ a = []
208
+ while !chrom.null?
209
+ a << chrom
210
+ chrom = chrom[:next]
211
+ end
212
+ return a
213
+ end
214
+ end
215
+ end
216
+ end
@@ -0,0 +1,128 @@
1
+ # == binding.rb
2
+ # This file contains the ffi binding declarations for the ucsc api
3
+ # See https://github.com/ffi/ffi
4
+ #
5
+ # == Contact
6
+ #
7
+ # Author:: Nicholas A. Thrower
8
+ # Copyright:: Copyright (c) 2012 Nicholas A Thrower
9
+ # License:: See LICENSE.txt for more details
10
+ #
11
+
12
+ # :nodoc:
13
+ module Bio
14
+ # -
15
+ module Ucsc
16
+ # Ruby binding for the ucsc utils
17
+ module Binding # :nodoc: all
18
+ require 'bio/ucsc/library'
19
+ extend FFI::Library
20
+ ffi_lib Bio::Ucsc::Library.filename
21
+
22
+ # CLASSES
23
+
24
+ # A zoom level in bigWig file
25
+ class BbiZoomLevel < FFI::Struct
26
+ layout(
27
+ :next_ptr, :pointer, # Next in list
28
+ :reductionLevel, :uint, # How many bases per item
29
+ :reserved, :uint, # Zero for Now
30
+ :dataOffset, :ulong_long, # Offset of data for this level in file
31
+ :indexOffset, :ulong_long # Offset of index for this level in file
32
+ )
33
+ # allow for nested self referential *next pointer
34
+ def [](value)
35
+ if value==:next
36
+ BbiZoomLevel.new(self[:next_ptr])
37
+ else
38
+ super(value)
39
+ end
40
+ end
41
+ end
42
+
43
+ # An open binary file (BigWig/BigBed)
44
+ class BbiFile < FFI::Struct
45
+ layout(
46
+ :next, :pointer, # Next in list.
47
+ :fileName, :string, # Name of file - for better error reporting.
48
+ :udc, :pointer, # Open UDC file handle.
49
+ :typeSig, :uint, # bigBedSig or bigWigSig for now.
50
+ :isSwapped, :bool, # If TRUE need to byte swap everything.
51
+ :chromBpt, :pointer, # Index of chromosomes.
52
+ :version, :ushort, # Version number - initially 1.
53
+ :zoomLevels, :ushort, # Number of zoom levels.
54
+ :chromTreeOffset, :ulong_long, # Offset to chromosome index.
55
+ :unzoomedDataOffset, :ulong_long, # Start of unzoomed data.
56
+ :unzoomedIndexOffset, :ulong_long,# Start of unzoomed index.
57
+ :fieldCount, :ushort, # Number of columns in bed version.
58
+ :definedFieldCount, :ushort, # Number of columns using bed standard definitions.
59
+ :asOffset, :ulong_long, # Offset to embedded null-terminated AutoSQL file.
60
+ :totalSummaryOffset, :ulong_long, # Offset to total summary information if any. (On older files have to calculate)
61
+ :uncompressBufSize, :uint, # Size of uncompression buffer, 0 if uncompressed
62
+ :unzoomedCir, :pointer, # Unzoomed data index in memory - may be NULL.
63
+ :levelList, :pointer # List of zoom levels.
64
+ )
65
+ end
66
+
67
+ # A BbiFile summary element
68
+ class BbiSummaryElement < FFI::Struct
69
+ layout(
70
+ :validCount,:ulong_long,
71
+ :minVal, :double,
72
+ :maxVal, :double,
73
+ :sumData, :double,
74
+ :sumSquares, :double
75
+ )
76
+ end
77
+
78
+ # Pair of a name and a 32-bit integer. Used to assign IDs to chromosomes.
79
+ class BbiChromInfo < FFI::Struct
80
+ layout(
81
+ :next_ptr, :pointer,
82
+ :name, :string, # Chromosome name
83
+ :id, :uint, # Chromosome ID - a small number usually
84
+ :size, :uint) # Chromosome size in bases
85
+ # allow for nested self referential *next pointer
86
+ def [](value)
87
+ if value==:next
88
+ BbiChromInfo.new(self[:next_ptr])
89
+ else
90
+ super(value)
91
+ end
92
+ end
93
+ end
94
+
95
+ ## ENUMS
96
+
97
+ # bbiSummaryType - way to summarize data
98
+ BbiSummaryType = enum(:bbiSumMean, 0,
99
+ :bbiSumMax,
100
+ :bbiSumMin,
101
+ :bbiSumCoverage,
102
+ :bbiSumStandardDeviation
103
+ )
104
+
105
+ # FUNCTIONS
106
+ # bbi
107
+ attach_function :bbiChromList, [:pointer], :pointer # *bbiFile ; BbiChromInfo*
108
+ attach_function :bbiChromSize, [:pointer,:pointer], :int32 # *bbiFile, chrom ; size
109
+ attach_function :bbiFileClose, [:pointer], :void # *bbiFile
110
+ attach_function :bbiTotalSummary, [:pointer], BbiSummaryElement.by_value # **bbiFile
111
+ attach_function :bbiSummaryTypeFromString, [:string], BbiSummaryType # summaryType
112
+ # bigwig
113
+ attach_function :bigWigFileCreate, [:string,:string,:int,:int,:bool,:bool,:string], :void # inName, chromSizes, blockSize, itemsPerSlot, clipDontDie, doCompress, outName
114
+ attach_function :bigWigFileOpen, [:string], :pointer # filename
115
+ attach_function :bigWigFileSmooth, [:string, :string, :int, :int,:bool,:int,:int,:string,:string,:double], :void # inName, chromSizes, blockSize, itemsPerSlot, doCompress, window, verbosity, outName, smoothType, Cutoff
116
+ attach_function :bigWigSummaryArray, [:pointer,:pointer,:uint32,:uint32,BbiSummaryType,:int,:pointer], :bool # *bbiFile, chrom, start, end, type, size, &values
117
+ attach_function :isBigWig, [:string], :bool # filename
118
+ # utils
119
+ attach_function :bedGraphToBigWig, [:string, :string, :int, :int, :bool, :string], :void # inName, chromSizes, outName
120
+ # udc
121
+ attach_function :udcDefaultDir, [], :string
122
+ attach_function :udcSetDefaultDir, [:string], :void # path
123
+ # hmmstats
124
+ attach_function :calcStdFromSums, [:double, :double, :uint64], :double # sum, sumSquares, n
125
+ attach_function :slCount, [:pointer], :int # *bbiChromList
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,38 @@
1
+ # == library.rb
2
+ # This file contains the Library Class for retrieving platform specific library names
3
+ #
4
+ # == Contact
5
+ #
6
+ # Author:: Nicholas A. Thrower
7
+ # Copyright:: Copyright (c) 2012 Nicholas A Thrower
8
+ # License:: See LICENSE.txt for more details
9
+ #
10
+
11
+ # :nodoc: all
12
+ module Bio
13
+ # -
14
+ module Ucsc
15
+ # Cross-platform library naming
16
+ class Library # :nodoc:
17
+ # return the platform specific library name
18
+ def self.filename
19
+ lib_os = case RUBY_PLATFORM
20
+ when /linux/
21
+ 'so.1'
22
+ when /darwin/
23
+ '1.dylib'
24
+ else
25
+ case RUBY_DESCRIPTION
26
+ when /darwin.*java/
27
+ '1.dylib'
28
+ when /linux.*java/
29
+ 'so.1'
30
+ else raise NotImplementedError, "UcscUtil not supported on your platform"
31
+ end
32
+ end
33
+
34
+ File.join(File.expand_path(File.dirname(__FILE__)),"src/libucsc.#{lib_os}")
35
+ end
36
+ end
37
+ end
38
+ end