absee 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/absee.rb +89 -2
- metadata +1 -1
data/lib/absee.rb
CHANGED
@@ -9,6 +9,15 @@
|
|
9
9
|
#
|
10
10
|
# MIT license 2012
|
11
11
|
|
12
|
+
#opens the ABIF sequencing / chromatogram file
|
13
|
+
#checks for ABIF file type
|
14
|
+
#major ABIF versions greater than 1 are not supported
|
15
|
+
#
|
16
|
+
#== Parameters:
|
17
|
+
#filename:: a string containing the filename (including the path and extensions)
|
18
|
+
#
|
19
|
+
#== Returns:
|
20
|
+
#Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
|
12
21
|
def readAB(filename)
|
13
22
|
#opens ab1 as a File object
|
14
23
|
abFile = open(filename)
|
@@ -26,6 +35,14 @@ def readAB(filename)
|
|
26
35
|
end
|
27
36
|
end
|
28
37
|
|
38
|
+
#process the opened ABIF filestream, and calls subsequent methods to extract the data
|
39
|
+
#
|
40
|
+
#== Parameters:
|
41
|
+
#filestream:: an opened File
|
42
|
+
#
|
43
|
+
#== Returns:
|
44
|
+
#Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
|
45
|
+
#readAB returns the results of this method
|
29
46
|
def processAB(filestream)
|
30
47
|
#// here, we can read the ABIF header information
|
31
48
|
version = readUnsignedByte_2(4, filestream)
|
@@ -45,6 +62,14 @@ def processAB(filestream)
|
|
45
62
|
return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
|
46
63
|
end
|
47
64
|
|
65
|
+
#reads 2 unsigned bytes and orders by most significant byte first
|
66
|
+
#
|
67
|
+
#== Parameters:
|
68
|
+
#offset:: how many bytes to offset for the read
|
69
|
+
#filestream:: an opened File
|
70
|
+
#
|
71
|
+
#== Returns:
|
72
|
+
#an int ordered by most significant byte first
|
48
73
|
def readUnsignedByte_2(offset, filestream)
|
49
74
|
#// most significant byte first
|
50
75
|
#// |byte0|byte1| <= |unsigned int|
|
@@ -54,6 +79,14 @@ def readUnsignedByte_2(offset, filestream)
|
|
54
79
|
return (byteArray.getbyte(0) << 8) | byteArray.getbyte(1)
|
55
80
|
end
|
56
81
|
|
82
|
+
#reads 4 unsigned bytes and orders by most significant byte first
|
83
|
+
#
|
84
|
+
#== Parameters:
|
85
|
+
#offset:: how many bytes to offset for the read
|
86
|
+
#filestream:: an opened File
|
87
|
+
#
|
88
|
+
#== Returns:
|
89
|
+
#an int ordered by most significant byte first
|
57
90
|
def readUnsignedByte_4(offset, filestream)
|
58
91
|
byteArray = ""
|
59
92
|
filestream.seek(offset, IO::SEEK_SET)
|
@@ -63,6 +96,16 @@ def readUnsignedByte_4(offset, filestream)
|
|
63
96
|
return (byteArray.getbyte(0)<<24) | (byteArray.getbyte(1)<<16) | (byteArray.getbyte(2)<<8) | byteArray.getbyte(3)
|
64
97
|
end
|
65
98
|
|
99
|
+
#reads the data from the directory
|
100
|
+
#
|
101
|
+
#== Parameters:
|
102
|
+
#dataOffset:: how many bytes to offset
|
103
|
+
#numElements:: number of elements in the file computed by gatherInformation
|
104
|
+
#filestream:: an opened File
|
105
|
+
#
|
106
|
+
#== Returns:
|
107
|
+
#an array of arrays, each with information from the directory
|
108
|
+
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
66
109
|
def readDirectoryEntry(filestream, dataOffset, numElements)
|
67
110
|
filestream.seek(dataOffset, IO::SEEK_SET)
|
68
111
|
byteArray = ""
|
@@ -109,7 +152,14 @@ end
|
|
109
152
|
|
110
153
|
#directory structure
|
111
154
|
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
112
|
-
#this is for easier
|
155
|
+
#this is for easier index into the each directory array
|
156
|
+
#
|
157
|
+
#== Parameters:
|
158
|
+
#array:: an array with information from the directory
|
159
|
+
#element:: a string with type of information from the directory to retrieve: [name, tag_number, element_type, element_size, number_of_elements, data_size, data_offset
|
160
|
+
#
|
161
|
+
#== Returns:
|
162
|
+
#the element from the array
|
113
163
|
def get(array, element)
|
114
164
|
if element == "name"
|
115
165
|
return array[0]
|
@@ -130,7 +180,14 @@ def get(array, element)
|
|
130
180
|
end
|
131
181
|
end
|
132
182
|
|
133
|
-
|
183
|
+
#counts the number of samples and number of bases contained in this ABIF file
|
184
|
+
#
|
185
|
+
#== Parameters:
|
186
|
+
#directory:: an array of array generated from readDirectoryEntry
|
187
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
188
|
+
#
|
189
|
+
#== Returns:
|
190
|
+
#number of samples and number of bases contained in this ABIF file
|
134
191
|
def gatherInformation(directory, numElements)
|
135
192
|
numSamples = 0
|
136
193
|
numBases = 0
|
@@ -148,6 +205,16 @@ def gatherInformation(directory, numElements)
|
|
148
205
|
return numSamples, numBases
|
149
206
|
end
|
150
207
|
|
208
|
+
#extracts the trace information for the bases
|
209
|
+
#
|
210
|
+
#== Parameters:
|
211
|
+
#filestream:: an open File
|
212
|
+
#directory:: an array of array generated by readDirectoryEntry
|
213
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
214
|
+
#numSamples:: an int calculated by gatherInformation
|
215
|
+
#
|
216
|
+
#== Returns:
|
217
|
+
#four arrays with trace data in the order ACGT
|
151
218
|
def getSamples(filestream, directory, numElements, numSamples)
|
152
219
|
samples_a = []
|
153
220
|
samples_c = []
|
@@ -188,6 +255,16 @@ def getSamples(filestream, directory, numElements, numSamples)
|
|
188
255
|
return samples_a, samples_c, samples_g, samples_t
|
189
256
|
end
|
190
257
|
|
258
|
+
#extracts the called sequence information
|
259
|
+
#
|
260
|
+
#== Parameters:
|
261
|
+
#filestream:: an open File
|
262
|
+
#directory:: an array of array generated by readDirectoryEntry
|
263
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
264
|
+
#numBases:: an int calculated by gatherInformation
|
265
|
+
#
|
266
|
+
#== Returns:
|
267
|
+
#an array with the called sequence
|
191
268
|
def getCalledSequence(filestream, directory, numElements, numBases)
|
192
269
|
calledSequence = []
|
193
270
|
(0..numElements-1).each do |i|
|
@@ -203,6 +280,16 @@ def getCalledSequence(filestream, directory, numElements, numBases)
|
|
203
280
|
return calledSequence
|
204
281
|
end
|
205
282
|
|
283
|
+
#extracts the trace information for the bases
|
284
|
+
#
|
285
|
+
#== Parameters:
|
286
|
+
#filestream:: an open File
|
287
|
+
#directory:: an array of array generated by readDirectoryEntry
|
288
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
289
|
+
#numBases:: an int calculated by gatherInformation
|
290
|
+
#
|
291
|
+
#== Returns:
|
292
|
+
#an array with the indexes of the peaks
|
206
293
|
def getPeakIndexes(filestream, directory, numElements, numBases)
|
207
294
|
peakIndexes = []
|
208
295
|
(0..numElements-1).each do |i|
|