absee 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/absee.rb +89 -2
- metadata +1 -1
data/lib/absee.rb
CHANGED
@@ -9,6 +9,15 @@
|
|
9
9
|
#
|
10
10
|
# MIT license 2012
|
11
11
|
|
12
|
+
#opens the ABIF sequencing / chromatogram file
|
13
|
+
#checks for ABIF file type
|
14
|
+
#major ABIF versions greater than 1 are not supported
|
15
|
+
#
|
16
|
+
#== Parameters:
|
17
|
+
#filename:: a string containing the filename (including the path and extensions)
|
18
|
+
#
|
19
|
+
#== Returns:
|
20
|
+
#Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
|
12
21
|
def readAB(filename)
|
13
22
|
#opens ab1 as a File object
|
14
23
|
abFile = open(filename)
|
@@ -26,6 +35,14 @@ def readAB(filename)
|
|
26
35
|
end
|
27
36
|
end
|
28
37
|
|
38
|
+
#process the opened ABIF filestream, and calls subsequent methods to extract the data
|
39
|
+
#
|
40
|
+
#== Parameters:
|
41
|
+
#filestream:: an opened File
|
42
|
+
#
|
43
|
+
#== Returns:
|
44
|
+
#Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
|
45
|
+
#readAB returns the results of this method
|
29
46
|
def processAB(filestream)
|
30
47
|
#// here, we can read the ABIF header information
|
31
48
|
version = readUnsignedByte_2(4, filestream)
|
@@ -45,6 +62,14 @@ def processAB(filestream)
|
|
45
62
|
return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
|
46
63
|
end
|
47
64
|
|
65
|
+
#reads 2 unsigned bytes and orders by most significant byte first
|
66
|
+
#
|
67
|
+
#== Parameters:
|
68
|
+
#offset:: how many bytes to offset for the read
|
69
|
+
#filestream:: an opened File
|
70
|
+
#
|
71
|
+
#== Returns:
|
72
|
+
#an int ordered by most significant byte first
|
48
73
|
def readUnsignedByte_2(offset, filestream)
|
49
74
|
#// most significant byte first
|
50
75
|
#// |byte0|byte1| <= |unsigned int|
|
@@ -54,6 +79,14 @@ def readUnsignedByte_2(offset, filestream)
|
|
54
79
|
return (byteArray.getbyte(0) << 8) | byteArray.getbyte(1)
|
55
80
|
end
|
56
81
|
|
82
|
+
#reads 4 unsigned bytes and orders by most significant byte first
|
83
|
+
#
|
84
|
+
#== Parameters:
|
85
|
+
#offset:: how many bytes to offset for the read
|
86
|
+
#filestream:: an opened File
|
87
|
+
#
|
88
|
+
#== Returns:
|
89
|
+
#an int ordered by most significant byte first
|
57
90
|
def readUnsignedByte_4(offset, filestream)
|
58
91
|
byteArray = ""
|
59
92
|
filestream.seek(offset, IO::SEEK_SET)
|
@@ -63,6 +96,16 @@ def readUnsignedByte_4(offset, filestream)
|
|
63
96
|
return (byteArray.getbyte(0)<<24) | (byteArray.getbyte(1)<<16) | (byteArray.getbyte(2)<<8) | byteArray.getbyte(3)
|
64
97
|
end
|
65
98
|
|
99
|
+
#reads the data from the directory
|
100
|
+
#
|
101
|
+
#== Parameters:
|
102
|
+
#dataOffset:: how many bytes to offset
|
103
|
+
#numElements:: number of elements in the file computed by gatherInformation
|
104
|
+
#filestream:: an opened File
|
105
|
+
#
|
106
|
+
#== Returns:
|
107
|
+
#an array of arrays, each with information from the directory
|
108
|
+
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
66
109
|
def readDirectoryEntry(filestream, dataOffset, numElements)
|
67
110
|
filestream.seek(dataOffset, IO::SEEK_SET)
|
68
111
|
byteArray = ""
|
@@ -109,7 +152,14 @@ end
|
|
109
152
|
|
110
153
|
#directory structure
|
111
154
|
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
112
|
-
#this is for easier
|
155
|
+
#this is for easier index into the each directory array
|
156
|
+
#
|
157
|
+
#== Parameters:
|
158
|
+
#array:: an array with information from the directory
|
159
|
+
#element:: a string with type of information from the directory to retrieve: [name, tag_number, element_type, element_size, number_of_elements, data_size, data_offset
|
160
|
+
#
|
161
|
+
#== Returns:
|
162
|
+
#the element from the array
|
113
163
|
def get(array, element)
|
114
164
|
if element == "name"
|
115
165
|
return array[0]
|
@@ -130,7 +180,14 @@ def get(array, element)
|
|
130
180
|
end
|
131
181
|
end
|
132
182
|
|
133
|
-
|
183
|
+
#counts the number of samples and number of bases contained in this ABIF file
|
184
|
+
#
|
185
|
+
#== Parameters:
|
186
|
+
#directory:: an array of array generated from readDirectoryEntry
|
187
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
188
|
+
#
|
189
|
+
#== Returns:
|
190
|
+
#number of samples and number of bases contained in this ABIF file
|
134
191
|
def gatherInformation(directory, numElements)
|
135
192
|
numSamples = 0
|
136
193
|
numBases = 0
|
@@ -148,6 +205,16 @@ def gatherInformation(directory, numElements)
|
|
148
205
|
return numSamples, numBases
|
149
206
|
end
|
150
207
|
|
208
|
+
#extracts the trace information for the bases
|
209
|
+
#
|
210
|
+
#== Parameters:
|
211
|
+
#filestream:: an open File
|
212
|
+
#directory:: an array of array generated by readDirectoryEntry
|
213
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
214
|
+
#numSamples:: an int calculated by gatherInformation
|
215
|
+
#
|
216
|
+
#== Returns:
|
217
|
+
#four arrays with trace data in the order ACGT
|
151
218
|
def getSamples(filestream, directory, numElements, numSamples)
|
152
219
|
samples_a = []
|
153
220
|
samples_c = []
|
@@ -188,6 +255,16 @@ def getSamples(filestream, directory, numElements, numSamples)
|
|
188
255
|
return samples_a, samples_c, samples_g, samples_t
|
189
256
|
end
|
190
257
|
|
258
|
+
#extracts the called sequence information
|
259
|
+
#
|
260
|
+
#== Parameters:
|
261
|
+
#filestream:: an open File
|
262
|
+
#directory:: an array of array generated by readDirectoryEntry
|
263
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
264
|
+
#numBases:: an int calculated by gatherInformation
|
265
|
+
#
|
266
|
+
#== Returns:
|
267
|
+
#an array with the called sequence
|
191
268
|
def getCalledSequence(filestream, directory, numElements, numBases)
|
192
269
|
calledSequence = []
|
193
270
|
(0..numElements-1).each do |i|
|
@@ -203,6 +280,16 @@ def getCalledSequence(filestream, directory, numElements, numBases)
|
|
203
280
|
return calledSequence
|
204
281
|
end
|
205
282
|
|
283
|
+
#extracts the trace information for the bases
|
284
|
+
#
|
285
|
+
#== Parameters:
|
286
|
+
#filestream:: an open File
|
287
|
+
#directory:: an array of array generated by readDirectoryEntry
|
288
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
289
|
+
#numBases:: an int calculated by gatherInformation
|
290
|
+
#
|
291
|
+
#== Returns:
|
292
|
+
#an array with the indexes of the peaks
|
206
293
|
def getPeakIndexes(filestream, directory, numElements, numBases)
|
207
294
|
peakIndexes = []
|
208
295
|
(0..numElements-1).each do |i|
|