absee 0.0.2.3 → 0.1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/absee.rb +276 -272
- metadata +5 -5
data/lib/absee.rb
CHANGED
|
@@ -1,309 +1,313 @@
|
|
|
1
1
|
# absee
|
|
2
|
-
#
|
|
3
|
-
# Jenny Cheng
|
|
2
|
+
#
|
|
3
|
+
# Jenny Cheng
|
|
4
4
|
# jencheng@ginkgobioworks.com
|
|
5
|
-
#
|
|
5
|
+
#
|
|
6
6
|
# based off of Abi.cs by Ronaldo Rodrigues Ferreira
|
|
7
|
-
#
|
|
7
|
+
#
|
|
8
8
|
# extracts the data from ABIF files
|
|
9
9
|
#
|
|
10
10
|
# MIT license 2012
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
|
|
17
|
-
#
|
|
18
|
-
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
#
|
|
22
|
-
|
|
23
|
-
#
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
12
|
+
module Absee
|
|
13
|
+
|
|
14
|
+
#opens the ABIF sequencing / chromatogram file
|
|
15
|
+
#checks for ABIF file type
|
|
16
|
+
#major ABIF versions greater than 1 are not supported
|
|
17
|
+
#
|
|
18
|
+
#== Parameters:
|
|
19
|
+
#filename::
|
|
20
|
+
# a string containing the filename (including the path and extensions)
|
|
21
|
+
#
|
|
22
|
+
#== Returns:
|
|
23
|
+
# Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
|
|
24
|
+
def self.readAB(filename)
|
|
25
|
+
#opens ab1 as a File object
|
|
26
|
+
abFile = open(filename)
|
|
27
|
+
byteArray = ""
|
|
28
|
+
#// here we read the first four bytes. It is important
|
|
29
|
+
#// to remember that we do not seek back the file, just
|
|
30
|
+
#// because it is not necessary to do this.
|
|
31
|
+
abFile.seek(0, IO::SEEK_SET)
|
|
32
|
+
abFile.read(4, byteArray)
|
|
33
|
+
#ABIF file indicator
|
|
34
|
+
if byteArray == "ABIF"
|
|
35
|
+
return processAB(abFile)
|
|
36
|
+
else
|
|
37
|
+
return [],[],[],[],[],[]
|
|
38
|
+
end
|
|
36
39
|
end
|
|
37
|
-
end
|
|
38
40
|
|
|
39
|
-
#process the opened ABIF filestream, and calls subsequent methods to extract the data
|
|
40
|
-
#
|
|
41
|
-
#== Parameters:
|
|
42
|
-
#filestream:: an opened File
|
|
43
|
-
#
|
|
44
|
-
#== Returns:
|
|
45
|
-
#Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
|
|
46
|
-
#readAB returns the results of this method
|
|
47
|
-
def processAB(filestream)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
41
|
+
#process the opened ABIF filestream, and calls subsequent methods to extract the data
|
|
42
|
+
#
|
|
43
|
+
#== Parameters:
|
|
44
|
+
#filestream:: an opened File
|
|
45
|
+
#
|
|
46
|
+
#== Returns:
|
|
47
|
+
#Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
|
|
48
|
+
#readAB returns the results of this method
|
|
49
|
+
def self.processAB(filestream)
|
|
50
|
+
#// here, we can read the ABIF header information
|
|
51
|
+
version = readUnsignedByte_2(4, filestream)
|
|
52
|
+
#// major versions greater than 1 are not supported
|
|
53
|
+
#// Applied Biosystems rules
|
|
54
|
+
if (version / 100 > 1)
|
|
55
|
+
return [], [], [], [], [], []
|
|
56
|
+
end
|
|
57
|
+
#// we just read ABIF, so we don't need more information than that
|
|
58
|
+
numElements = readUnsignedByte_4(18, filestream)
|
|
59
|
+
dataOffset = readUnsignedByte_4(26, filestream)
|
|
60
|
+
directory = readDirectoryEntry(filestream, dataOffset, numElements)
|
|
61
|
+
numSamples, numBases = gatherInformation(directory, numElements)
|
|
62
|
+
samples_a, samples_c, samples_g, samples_t = getSamples(filestream, directory, numElements, numSamples)
|
|
63
|
+
called_sequence = getCalledSequence(filestream, directory, numElements, numBases)
|
|
64
|
+
peakIndexes = getPeakIndexes(filestream, directory, numElements, numBases)
|
|
65
|
+
return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
|
|
54
66
|
end
|
|
55
|
-
#// we just read ABIF, so we don't need more information than that
|
|
56
|
-
numElements = readUnsignedByte_4(18, filestream)
|
|
57
|
-
dataOffset = readUnsignedByte_4(26, filestream)
|
|
58
|
-
directory = readDirectoryEntry(filestream, dataOffset, numElements)
|
|
59
|
-
numSamples, numBases = gatherInformation(directory, numElements)
|
|
60
|
-
samples_a, samples_c, samples_g, samples_t = getSamples(filestream, directory, numElements, numSamples)
|
|
61
|
-
called_sequence = getCalledSequence(filestream, directory, numElements, numBases)
|
|
62
|
-
peakIndexes = getPeakIndexes(filestream, directory, numElements, numBases)
|
|
63
|
-
return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
|
|
64
|
-
end
|
|
65
67
|
|
|
66
|
-
#reads 2 unsigned bytes and orders by most significant byte first
|
|
67
|
-
#
|
|
68
|
-
#== Parameters:
|
|
69
|
-
#offset:: how many bytes to offset for the read
|
|
70
|
-
#filestream:: an opened File
|
|
71
|
-
#
|
|
72
|
-
#== Returns:
|
|
73
|
-
#an int ordered by most significant byte first
|
|
74
|
-
def readUnsignedByte_2(offset, filestream)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
end
|
|
68
|
+
#reads 2 unsigned bytes and orders by most significant byte first
|
|
69
|
+
#
|
|
70
|
+
#== Parameters:
|
|
71
|
+
#offset:: how many bytes to offset for the read
|
|
72
|
+
#filestream:: an opened File
|
|
73
|
+
#
|
|
74
|
+
#== Returns:
|
|
75
|
+
#an int ordered by most significant byte first
|
|
76
|
+
def self.readUnsignedByte_2(offset, filestream)
|
|
77
|
+
#// most significant byte first
|
|
78
|
+
#// |byte0|byte1| <= |unsigned int|
|
|
79
|
+
byteArray = ""
|
|
80
|
+
filestream.seek(offset, IO::SEEK_SET)
|
|
81
|
+
byteArray = filestream.read(2, byteArray)
|
|
82
|
+
return (byteArray.getbyte(0) << 8) | byteArray.getbyte(1)
|
|
83
|
+
end
|
|
82
84
|
|
|
83
|
-
#reads 4 unsigned bytes and orders by most significant byte first
|
|
84
|
-
#
|
|
85
|
-
#== Parameters:
|
|
86
|
-
#offset:: how many bytes to offset for the read
|
|
87
|
-
#filestream:: an opened File
|
|
88
|
-
#
|
|
89
|
-
#== Returns:
|
|
90
|
-
#an int ordered by most significant byte first
|
|
91
|
-
def readUnsignedByte_4(offset, filestream)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
end
|
|
85
|
+
#reads 4 unsigned bytes and orders by most significant byte first
|
|
86
|
+
#
|
|
87
|
+
#== Parameters:
|
|
88
|
+
#offset:: how many bytes to offset for the read
|
|
89
|
+
#filestream:: an opened File
|
|
90
|
+
#
|
|
91
|
+
#== Returns:
|
|
92
|
+
#an int ordered by most significant byte first
|
|
93
|
+
def self.readUnsignedByte_4(offset, filestream)
|
|
94
|
+
byteArray = ""
|
|
95
|
+
filestream.seek(offset, IO::SEEK_SET)
|
|
96
|
+
byteArray = filestream.read(4, byteArray)
|
|
97
|
+
#// most significant byte first
|
|
98
|
+
#// |byte0|byte1|byte2|byte3| <= |unsigned int|
|
|
99
|
+
return (byteArray.getbyte(0)<<24) | (byteArray.getbyte(1)<<16) | (byteArray.getbyte(2)<<8) | byteArray.getbyte(3)
|
|
100
|
+
end
|
|
99
101
|
|
|
100
|
-
#reads the data from the directory
|
|
101
|
-
#
|
|
102
|
-
#== Parameters:
|
|
103
|
-
#dataOffset:: how many bytes to offset
|
|
104
|
-
#numElements:: number of elements in the file computed by gatherInformation
|
|
105
|
-
#filestream:: an opened File
|
|
106
|
-
#
|
|
107
|
-
#== Returns:
|
|
108
|
-
#an array of arrays, each with information from the directory
|
|
109
|
-
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
|
110
|
-
def readDirectoryEntry(filestream, dataOffset, numElements)
|
|
111
|
-
filestream.seek(dataOffset, IO::SEEK_SET)
|
|
112
|
-
byteArray = ""
|
|
113
|
-
filestream.read(28*numElements, byteArray)
|
|
114
|
-
directory = []
|
|
115
|
-
pos = -1
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
#directory structure
|
|
102
|
+
#reads the data from the directory
|
|
103
|
+
#
|
|
104
|
+
#== Parameters:
|
|
105
|
+
#dataOffset:: how many bytes to offset
|
|
106
|
+
#numElements:: number of elements in the file computed by gatherInformation
|
|
107
|
+
#filestream:: an opened File
|
|
108
|
+
#
|
|
109
|
+
#== Returns:
|
|
110
|
+
#an array of arrays, each with information from the directory
|
|
119
111
|
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
|
120
|
-
(
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
name << byteArray.getbyte(pos+=1).chr
|
|
127
|
-
name << byteArray.getbyte(pos+=1).chr
|
|
128
|
-
directory[i] << name
|
|
129
|
-
#// tag number
|
|
130
|
-
tag_number = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
131
|
-
directory[i] << tag_number
|
|
132
|
-
#// element type
|
|
133
|
-
element_type = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
134
|
-
directory[i] << element_type
|
|
135
|
-
#// element size
|
|
136
|
-
element_size = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
137
|
-
directory[i] << element_size
|
|
138
|
-
#// number of elements
|
|
139
|
-
number_of_elements = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
140
|
-
directory[i] << number_of_elements
|
|
141
|
-
#// data size
|
|
142
|
-
data_size = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
143
|
-
directory[i] << data_size
|
|
144
|
-
#// data offset
|
|
145
|
-
data_offset = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
146
|
-
directory[i] << data_offset
|
|
147
|
-
#// we do not save the dataHandle field
|
|
148
|
-
pos += 4;
|
|
149
|
-
end
|
|
150
|
-
return directory
|
|
151
|
-
end
|
|
112
|
+
def self.readDirectoryEntry(filestream, dataOffset, numElements)
|
|
113
|
+
filestream.seek(dataOffset, IO::SEEK_SET)
|
|
114
|
+
byteArray = ""
|
|
115
|
+
filestream.read(28*numElements, byteArray)
|
|
116
|
+
directory = []
|
|
117
|
+
pos = -1
|
|
152
118
|
|
|
153
119
|
|
|
154
|
-
#directory structure
|
|
155
|
-
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
120
|
+
#directory structure
|
|
121
|
+
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
|
122
|
+
(0..(numElements-1)).each do |i|
|
|
123
|
+
directory[i] = []
|
|
124
|
+
#// name
|
|
125
|
+
name = ""
|
|
126
|
+
name << byteArray.getbyte(pos+=1).chr
|
|
127
|
+
name << byteArray.getbyte(pos+=1).chr
|
|
128
|
+
name << byteArray.getbyte(pos+=1).chr
|
|
129
|
+
name << byteArray.getbyte(pos+=1).chr
|
|
130
|
+
directory[i] << name
|
|
131
|
+
#// tag number
|
|
132
|
+
tag_number = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
133
|
+
directory[i] << tag_number
|
|
134
|
+
#// element type
|
|
135
|
+
element_type = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
136
|
+
directory[i] << element_type
|
|
137
|
+
#// element size
|
|
138
|
+
element_size = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
139
|
+
directory[i] << element_size
|
|
140
|
+
#// number of elements
|
|
141
|
+
number_of_elements = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
142
|
+
directory[i] << number_of_elements
|
|
143
|
+
#// data size
|
|
144
|
+
data_size = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
145
|
+
directory[i] << data_size
|
|
146
|
+
#// data offset
|
|
147
|
+
data_offset = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
|
|
148
|
+
directory[i] << data_offset
|
|
149
|
+
#// we do not save the dataHandle field
|
|
150
|
+
pos += 4;
|
|
151
|
+
end
|
|
152
|
+
return directory
|
|
181
153
|
end
|
|
182
|
-
end
|
|
183
154
|
|
|
184
|
-
|
|
185
|
-
#
|
|
186
|
-
|
|
187
|
-
#
|
|
188
|
-
#
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
#
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
155
|
+
|
|
156
|
+
#directory structure
|
|
157
|
+
#[name, tag number, element type, element size, number of elements, data size, data offset]
|
|
158
|
+
#this is for easier index into the each directory array
|
|
159
|
+
#
|
|
160
|
+
#== Parameters:
|
|
161
|
+
#array:: an array with information from the directory
|
|
162
|
+
#element:: a string with type of information from the directory to retrieve: [name, tag_number, element_type, element_size, number_of_elements, data_size, data_offset
|
|
163
|
+
#
|
|
164
|
+
#== Returns:
|
|
165
|
+
#the element from the array
|
|
166
|
+
def self.get(array, element)
|
|
167
|
+
if element == "name"
|
|
168
|
+
return array[0]
|
|
169
|
+
elsif element == "tag_number"
|
|
170
|
+
return array[1]
|
|
171
|
+
elsif element == "element_type"
|
|
172
|
+
return array[2]
|
|
173
|
+
elsif element == "element_size"
|
|
174
|
+
return array[3]
|
|
175
|
+
elsif element == "number_of_elements"
|
|
176
|
+
return array[4]
|
|
177
|
+
elsif element == "data_size"
|
|
178
|
+
return array[5]
|
|
179
|
+
elsif element == "data_offset"
|
|
180
|
+
return array[6]
|
|
199
181
|
else
|
|
200
|
-
|
|
201
|
-
numBases = get(directory[i], "number_of_elements") #number of elements
|
|
202
|
-
end
|
|
182
|
+
return array[0]
|
|
203
183
|
end
|
|
204
184
|
end
|
|
205
|
-
|
|
206
|
-
return numSamples, numBases
|
|
207
|
-
end
|
|
208
185
|
|
|
209
|
-
#
|
|
210
|
-
#
|
|
211
|
-
#== Parameters:
|
|
212
|
-
#
|
|
213
|
-
#
|
|
214
|
-
#
|
|
215
|
-
|
|
216
|
-
#
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
samples_a = []
|
|
221
|
-
samples_c = []
|
|
222
|
-
samples_g = []
|
|
223
|
-
samples_t = []
|
|
186
|
+
#counts the number of samples and number of bases contained in this ABIF file
|
|
187
|
+
#
|
|
188
|
+
#== Parameters:
|
|
189
|
+
#directory:: an array of array generated from readDirectoryEntry
|
|
190
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
|
191
|
+
#
|
|
192
|
+
#== Returns:
|
|
193
|
+
#number of samples and number of bases contained in this ABIF file
|
|
194
|
+
def self.gatherInformation(directory, numElements)
|
|
195
|
+
numSamples = 0
|
|
196
|
+
numBases = 0
|
|
224
197
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
filestream.read(get(directory[i], "number_of_elements")*2, byteArray_samples)
|
|
232
|
-
pos = -1
|
|
233
|
-
if tag_number == 9 #G
|
|
234
|
-
(0..numSamples-1).each do |j|
|
|
235
|
-
value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
|
|
236
|
-
samples_g[j] = value
|
|
237
|
-
end
|
|
238
|
-
elsif tag_number == 10 #A
|
|
239
|
-
(0..numSamples-1).each do |j|
|
|
240
|
-
value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
|
|
241
|
-
samples_a[j] = value
|
|
198
|
+
(0..(numElements-1)).each do |i|
|
|
199
|
+
if (get(directory[i],"name") == "DATA") && (get(directory[i], "tag_number") == 9)
|
|
200
|
+
numSamples = get(directory[i], "number_of_elements") #number of elements
|
|
201
|
+
else
|
|
202
|
+
if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
|
|
203
|
+
numBases = get(directory[i], "number_of_elements") #number of elements
|
|
242
204
|
end
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
return numSamples, numBases
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
#extracts the trace information for the bases
|
|
212
|
+
#
|
|
213
|
+
#== Parameters:
|
|
214
|
+
#filestream:: an open File
|
|
215
|
+
#directory:: an array of array generated by readDirectoryEntry
|
|
216
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
|
217
|
+
#numSamples:: an int calculated by gatherInformation
|
|
218
|
+
#
|
|
219
|
+
#== Returns:
|
|
220
|
+
#four arrays with trace data in the order ACGT
|
|
221
|
+
def self.getSamples(filestream, directory, numElements, numSamples)
|
|
222
|
+
samples_a = []
|
|
223
|
+
samples_c = []
|
|
224
|
+
samples_g = []
|
|
225
|
+
samples_t = []
|
|
226
|
+
|
|
227
|
+
#// we guess the order being GATC, as Ferreira and Staden does
|
|
228
|
+
(0..numElements-1).each do |i|
|
|
229
|
+
tag_number = get(directory[i], "tag_number")
|
|
230
|
+
if (get(directory[i],"name") == "DATA") && ([9,10,11,12].include? tag_number)
|
|
231
|
+
byteArray_samples = ""
|
|
232
|
+
filestream.seek(get(directory[i],"data_offset"), IO::SEEK_SET)
|
|
233
|
+
filestream.read(get(directory[i], "number_of_elements")*2, byteArray_samples)
|
|
234
|
+
pos = -1
|
|
235
|
+
if tag_number == 9 #G
|
|
236
|
+
(0..numSamples-1).each do |j|
|
|
237
|
+
value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
|
|
238
|
+
samples_g[j] = value
|
|
239
|
+
end
|
|
240
|
+
elsif tag_number == 10 #A
|
|
241
|
+
(0..numSamples-1).each do |j|
|
|
242
|
+
value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
|
|
243
|
+
samples_a[j] = value
|
|
244
|
+
end
|
|
245
|
+
elsif tag_number == 11 #T
|
|
246
|
+
(0..numSamples-1).each do |j|
|
|
247
|
+
value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
|
|
248
|
+
samples_t[j] = value
|
|
249
|
+
end
|
|
250
|
+
else #C
|
|
251
|
+
(0..numSamples-1).each do |j|
|
|
252
|
+
value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
|
|
253
|
+
samples_c[j] = value
|
|
254
|
+
end
|
|
252
255
|
end
|
|
253
256
|
end
|
|
254
257
|
end
|
|
258
|
+
return samples_a, samples_c, samples_g, samples_t
|
|
255
259
|
end
|
|
256
|
-
return samples_a, samples_c, samples_g, samples_t
|
|
257
|
-
end
|
|
258
260
|
|
|
259
|
-
#extracts the called sequence information
|
|
260
|
-
#
|
|
261
|
-
#== Parameters:
|
|
262
|
-
#filestream:: an open File
|
|
263
|
-
#directory:: an array of array generated by readDirectoryEntry
|
|
264
|
-
#numElements:: an int indicating the number of elements in this ABIF file
|
|
265
|
-
#numBases:: an int calculated by gatherInformation
|
|
266
|
-
#
|
|
267
|
-
#== Returns:
|
|
268
|
-
#an array with the called sequence
|
|
269
|
-
def getCalledSequence(filestream, directory, numElements, numBases)
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
261
|
+
#extracts the called sequence information
|
|
262
|
+
#
|
|
263
|
+
#== Parameters:
|
|
264
|
+
#filestream:: an open File
|
|
265
|
+
#directory:: an array of array generated by readDirectoryEntry
|
|
266
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
|
267
|
+
#numBases:: an int calculated by gatherInformation
|
|
268
|
+
#
|
|
269
|
+
#== Returns:
|
|
270
|
+
#an array with the called sequence
|
|
271
|
+
def self.getCalledSequence(filestream, directory, numElements, numBases)
|
|
272
|
+
calledSequence = []
|
|
273
|
+
(0..numElements-1).each do |i|
|
|
274
|
+
if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
|
|
275
|
+
byteArray_seq = ""
|
|
276
|
+
filestream.seek(get(directory[i], "data_offset"))
|
|
277
|
+
filestream.read(numBases,byteArray_seq)
|
|
278
|
+
(0..numBases-1).each do |j|
|
|
279
|
+
calledSequence[j] = byteArray_seq.getbyte(j).chr
|
|
280
|
+
end
|
|
278
281
|
end
|
|
279
282
|
end
|
|
283
|
+
return calledSequence
|
|
280
284
|
end
|
|
281
|
-
return calledSequence
|
|
282
|
-
end
|
|
283
285
|
|
|
284
|
-
#extracts the trace information for the bases
|
|
285
|
-
#
|
|
286
|
-
#== Parameters:
|
|
287
|
-
#filestream:: an open File
|
|
288
|
-
#directory:: an array of array generated by readDirectoryEntry
|
|
289
|
-
#numElements:: an int indicating the number of elements in this ABIF file
|
|
290
|
-
#numBases:: an int calculated by gatherInformation
|
|
291
|
-
#
|
|
292
|
-
#== Returns:
|
|
293
|
-
#an array with the indexes of the peaks
|
|
294
|
-
def getPeakIndexes(filestream, directory, numElements, numBases)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
286
|
+
#extracts the trace information for the bases
|
|
287
|
+
#
|
|
288
|
+
#== Parameters:
|
|
289
|
+
#filestream:: an open File
|
|
290
|
+
#directory:: an array of array generated by readDirectoryEntry
|
|
291
|
+
#numElements:: an int indicating the number of elements in this ABIF file
|
|
292
|
+
#numBases:: an int calculated by gatherInformation
|
|
293
|
+
#
|
|
294
|
+
#== Returns:
|
|
295
|
+
#an array with the indexes of the peaks
|
|
296
|
+
def self.getPeakIndexes(filestream, directory, numElements, numBases)
|
|
297
|
+
peakIndexes = []
|
|
298
|
+
(0..numElements-1).each do |i|
|
|
299
|
+
if (get(directory[i], "name") == "PLOC") && (get(directory[i], "tag_number") == 2)
|
|
300
|
+
byteArray_peak = ""
|
|
301
|
+
filestream.seek(get(directory[i], "data_offset"), IO::SEEK_SET)
|
|
302
|
+
filestream.read(get(directory[i], "number_of_elements")*4, byteArray_peak)
|
|
303
|
+
pos = -1
|
|
304
|
+
(0..numBases-1).each do |j|
|
|
305
|
+
peakIndex = byteArray_peak.getbyte(pos+=1) << 8 | byteArray_peak.getbyte(pos+=1)
|
|
306
|
+
peakIndexes[j] = peakIndex
|
|
307
|
+
end
|
|
305
308
|
end
|
|
306
309
|
end
|
|
310
|
+
return peakIndexes
|
|
307
311
|
end
|
|
308
|
-
|
|
312
|
+
|
|
309
313
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: absee
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.1.0.0
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,10 +9,10 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2012-
|
|
12
|
+
date: 2012-11-14 00:00:00.000000000 Z
|
|
13
13
|
dependencies: []
|
|
14
|
-
description:
|
|
15
|
-
|
|
14
|
+
description: .ab1 reader / ABIF reader; extracts the peak indexes, called sequence,
|
|
15
|
+
and ACGT values from sequencing files
|
|
16
16
|
email: jencheng@ginkgobioworks.com
|
|
17
17
|
executables: []
|
|
18
18
|
extensions: []
|
|
@@ -43,6 +43,6 @@ rubyforge_project:
|
|
|
43
43
|
rubygems_version: 1.8.23
|
|
44
44
|
signing_key:
|
|
45
45
|
specification_version: 3
|
|
46
|
-
summary:
|
|
46
|
+
summary: .ab1 reader / ABIF reader
|
|
47
47
|
test_files: []
|
|
48
48
|
has_rdoc:
|