absee 0.0.2.3 → 0.1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/absee.rb +276 -272
  2. metadata +5 -5
@@ -1,309 +1,313 @@
1
1
  # absee
2
- #
3
- # Jenny Cheng
2
+ #
3
+ # Jenny Cheng
4
4
  # jencheng@ginkgobioworks.com
5
- #
5
+ #
6
6
  # based off of Abi.cs by Ronaldo Rodrigues Ferreira
7
- #
7
+ #
8
8
  # extracts the data from ABIF files
9
9
  #
10
10
  # MIT license 2012
11
11
 
12
- #opens the ABIF sequencing / chromatogram file
13
- #checks for ABIF file type
14
- #major ABIF versions greater than 1 are not supported
15
- #
16
- #== Parameters:
17
- #filename::
18
- # a string containing the filename (including the path and extensions)
19
- #
20
- #== Returns:
21
- # Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
22
- def readAB(filename)
23
- #opens ab1 as a File object
24
- abFile = open(filename)
25
- byteArray = ""
26
- #// here we read the first four bytes. It is important
27
- #// to remember that we do not seek back the file, just
28
- #// because it is not necessary to do this.
29
- abFile.seek(0, IO::SEEK_SET)
30
- abFile.read(4, byteArray)
31
- #ABIF file indicator
32
- if byteArray == "ABIF"
33
- return processAB(abFile)
34
- else
35
- return [],[],[],[],[],[]
12
+ module Absee
13
+
14
+ #opens the ABIF sequencing / chromatogram file
15
+ #checks for ABIF file type
16
+ #major ABIF versions greater than 1 are not supported
17
+ #
18
+ #== Parameters:
19
+ #filename::
20
+ # a string containing the filename (including the path and extensions)
21
+ #
22
+ #== Returns:
23
+ # Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
24
+ def self.readAB(filename)
25
+ #opens ab1 as a File object
26
+ abFile = open(filename)
27
+ byteArray = ""
28
+ #// here we read the first four bytes. It is important
29
+ #// to remember that we do not seek back the file, just
30
+ #// because it is not necessary to do this.
31
+ abFile.seek(0, IO::SEEK_SET)
32
+ abFile.read(4, byteArray)
33
+ #ABIF file indicator
34
+ if byteArray == "ABIF"
35
+ return processAB(abFile)
36
+ else
37
+ return [],[],[],[],[],[]
38
+ end
36
39
  end
37
- end
38
40
 
39
- #process the opened ABIF filestream, and calls subsequent methods to extract the data
40
- #
41
- #== Parameters:
42
- #filestream:: an opened File
43
- #
44
- #== Returns:
45
- #Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
46
- #readAB returns the results of this method
47
- def processAB(filestream)
48
- #// here, we can read the ABIF header information
49
- version = readUnsignedByte_2(4, filestream)
50
- #// major versions greater than 1 are not supported
51
- #// Applied Biosystems rules
52
- if (version / 100 > 1)
53
- return [], [], [], [], [], []
41
+ #process the opened ABIF filestream, and calls subsequent methods to extract the data
42
+ #
43
+ #== Parameters:
44
+ #filestream:: an opened File
45
+ #
46
+ #== Returns:
47
+ #Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
48
+ #readAB returns the results of this method
49
+ def self.processAB(filestream)
50
+ #// here, we can read the ABIF header information
51
+ version = readUnsignedByte_2(4, filestream)
52
+ #// major versions greater than 1 are not supported
53
+ #// Applied Biosystems rules
54
+ if (version / 100 > 1)
55
+ return [], [], [], [], [], []
56
+ end
57
+ #// we just read ABIF, so we don't need more information than that
58
+ numElements = readUnsignedByte_4(18, filestream)
59
+ dataOffset = readUnsignedByte_4(26, filestream)
60
+ directory = readDirectoryEntry(filestream, dataOffset, numElements)
61
+ numSamples, numBases = gatherInformation(directory, numElements)
62
+ samples_a, samples_c, samples_g, samples_t = getSamples(filestream, directory, numElements, numSamples)
63
+ called_sequence = getCalledSequence(filestream, directory, numElements, numBases)
64
+ peakIndexes = getPeakIndexes(filestream, directory, numElements, numBases)
65
+ return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
54
66
  end
55
- #// we just read ABIF, so we don't need more information than that
56
- numElements = readUnsignedByte_4(18, filestream)
57
- dataOffset = readUnsignedByte_4(26, filestream)
58
- directory = readDirectoryEntry(filestream, dataOffset, numElements)
59
- numSamples, numBases = gatherInformation(directory, numElements)
60
- samples_a, samples_c, samples_g, samples_t = getSamples(filestream, directory, numElements, numSamples)
61
- called_sequence = getCalledSequence(filestream, directory, numElements, numBases)
62
- peakIndexes = getPeakIndexes(filestream, directory, numElements, numBases)
63
- return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
64
- end
65
67
 
66
- #reads 2 unsigned bytes and orders by most significant byte first
67
- #
68
- #== Parameters:
69
- #offset:: how many bytes to offset for the read
70
- #filestream:: an opened File
71
- #
72
- #== Returns:
73
- #an int ordered by most significant byte first
74
- def readUnsignedByte_2(offset, filestream)
75
- #// most significant byte first
76
- #// |byte0|byte1| <= |unsigned int|
77
- byteArray = ""
78
- filestream.seek(offset, IO::SEEK_SET)
79
- byteArray = filestream.read(2, byteArray)
80
- return (byteArray.getbyte(0) << 8) | byteArray.getbyte(1)
81
- end
68
+ #reads 2 unsigned bytes and orders by most significant byte first
69
+ #
70
+ #== Parameters:
71
+ #offset:: how many bytes to offset for the read
72
+ #filestream:: an opened File
73
+ #
74
+ #== Returns:
75
+ #an int ordered by most significant byte first
76
+ def self.readUnsignedByte_2(offset, filestream)
77
+ #// most significant byte first
78
+ #// |byte0|byte1| <= |unsigned int|
79
+ byteArray = ""
80
+ filestream.seek(offset, IO::SEEK_SET)
81
+ byteArray = filestream.read(2, byteArray)
82
+ return (byteArray.getbyte(0) << 8) | byteArray.getbyte(1)
83
+ end
82
84
 
83
- #reads 4 unsigned bytes and orders by most significant byte first
84
- #
85
- #== Parameters:
86
- #offset:: how many bytes to offset for the read
87
- #filestream:: an opened File
88
- #
89
- #== Returns:
90
- #an int ordered by most significant byte first
91
- def readUnsignedByte_4(offset, filestream)
92
- byteArray = ""
93
- filestream.seek(offset, IO::SEEK_SET)
94
- byteArray = filestream.read(4, byteArray)
95
- #// most significant byte first
96
- #// |byte0|byte1|byte2|byte3| <= |unsigned int|
97
- return (byteArray.getbyte(0)<<24) | (byteArray.getbyte(1)<<16) | (byteArray.getbyte(2)<<8) | byteArray.getbyte(3)
98
- end
85
+ #reads 4 unsigned bytes and orders by most significant byte first
86
+ #
87
+ #== Parameters:
88
+ #offset:: how many bytes to offset for the read
89
+ #filestream:: an opened File
90
+ #
91
+ #== Returns:
92
+ #an int ordered by most significant byte first
93
+ def self.readUnsignedByte_4(offset, filestream)
94
+ byteArray = ""
95
+ filestream.seek(offset, IO::SEEK_SET)
96
+ byteArray = filestream.read(4, byteArray)
97
+ #// most significant byte first
98
+ #// |byte0|byte1|byte2|byte3| <= |unsigned int|
99
+ return (byteArray.getbyte(0)<<24) | (byteArray.getbyte(1)<<16) | (byteArray.getbyte(2)<<8) | byteArray.getbyte(3)
100
+ end
99
101
 
100
- #reads the data from the directory
101
- #
102
- #== Parameters:
103
- #dataOffset:: how many bytes to offset
104
- #numElements:: number of elements in the file computed by gatherInformation
105
- #filestream:: an opened File
106
- #
107
- #== Returns:
108
- #an array of arrays, each with information from the directory
109
- #[name, tag number, element type, element size, number of elements, data size, data offset]
110
- def readDirectoryEntry(filestream, dataOffset, numElements)
111
- filestream.seek(dataOffset, IO::SEEK_SET)
112
- byteArray = ""
113
- filestream.read(28*numElements, byteArray)
114
- directory = []
115
- pos = -1
116
-
117
-
118
- #directory structure
102
+ #reads the data from the directory
103
+ #
104
+ #== Parameters:
105
+ #dataOffset:: how many bytes to offset
106
+ #numElements:: number of elements in the file computed by gatherInformation
107
+ #filestream:: an opened File
108
+ #
109
+ #== Returns:
110
+ #an array of arrays, each with information from the directory
119
111
  #[name, tag number, element type, element size, number of elements, data size, data offset]
120
- (0..(numElements-1)).each do |i|
121
- directory[i] = []
122
- #// name
123
- name = ""
124
- name << byteArray.getbyte(pos+=1).chr
125
- name << byteArray.getbyte(pos+=1).chr
126
- name << byteArray.getbyte(pos+=1).chr
127
- name << byteArray.getbyte(pos+=1).chr
128
- directory[i] << name
129
- #// tag number
130
- tag_number = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
131
- directory[i] << tag_number
132
- #// element type
133
- element_type = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
134
- directory[i] << element_type
135
- #// element size
136
- element_size = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
137
- directory[i] << element_size
138
- #// number of elements
139
- number_of_elements = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
140
- directory[i] << number_of_elements
141
- #// data size
142
- data_size = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
143
- directory[i] << data_size
144
- #// data offset
145
- data_offset = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
146
- directory[i] << data_offset
147
- #// we do not save the dataHandle field
148
- pos += 4;
149
- end
150
- return directory
151
- end
112
+ def self.readDirectoryEntry(filestream, dataOffset, numElements)
113
+ filestream.seek(dataOffset, IO::SEEK_SET)
114
+ byteArray = ""
115
+ filestream.read(28*numElements, byteArray)
116
+ directory = []
117
+ pos = -1
152
118
 
153
119
 
154
- #directory structure
155
- #[name, tag number, element type, element size, number of elements, data size, data offset]
156
- #this is for easier index into the each directory array
157
- #
158
- #== Parameters:
159
- #array:: an array with information from the directory
160
- #element:: a string with type of information from the directory to retrieve: [name, tag_number, element_type, element_size, number_of_elements, data_size, data_offset
161
- #
162
- #== Returns:
163
- #the element from the array
164
- def get(array, element)
165
- if element == "name"
166
- return array[0]
167
- elsif element == "tag_number"
168
- return array[1]
169
- elsif element == "element_type"
170
- return array[2]
171
- elsif element == "element_size"
172
- return array[3]
173
- elsif element == "number_of_elements"
174
- return array[4]
175
- elsif element == "data_size"
176
- return array[5]
177
- elsif element == "data_offset"
178
- return array[6]
179
- else
180
- return array[0]
120
+ #directory structure
121
+ #[name, tag number, element type, element size, number of elements, data size, data offset]
122
+ (0..(numElements-1)).each do |i|
123
+ directory[i] = []
124
+ #// name
125
+ name = ""
126
+ name << byteArray.getbyte(pos+=1).chr
127
+ name << byteArray.getbyte(pos+=1).chr
128
+ name << byteArray.getbyte(pos+=1).chr
129
+ name << byteArray.getbyte(pos+=1).chr
130
+ directory[i] << name
131
+ #// tag number
132
+ tag_number = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
133
+ directory[i] << tag_number
134
+ #// element type
135
+ element_type = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
136
+ directory[i] << element_type
137
+ #// element size
138
+ element_size = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
139
+ directory[i] << element_size
140
+ #// number of elements
141
+ number_of_elements = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
142
+ directory[i] << number_of_elements
143
+ #// data size
144
+ data_size = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
145
+ directory[i] << data_size
146
+ #// data offset
147
+ data_offset = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
148
+ directory[i] << data_offset
149
+ #// we do not save the dataHandle field
150
+ pos += 4;
151
+ end
152
+ return directory
181
153
  end
182
- end
183
154
 
184
- #counts the number of samples and number of bases contained in this ABIF file
185
- #
186
- #== Parameters:
187
- #directory:: an array of array generated from readDirectoryEntry
188
- #numElements:: an int indicating the number of elements in this ABIF file
189
- #
190
- #== Returns:
191
- #number of samples and number of bases contained in this ABIF file
192
- def gatherInformation(directory, numElements)
193
- numSamples = 0
194
- numBases = 0
195
-
196
- (0..(numElements-1)).each do |i|
197
- if (get(directory[i],"name") == "DATA") && (get(directory[i], "tag_number") == 9)
198
- numSamples = get(directory[i], "number_of_elements") #number of elements
155
+
156
+ #directory structure
157
+ #[name, tag number, element type, element size, number of elements, data size, data offset]
158
+ #this is for easier index into the each directory array
159
+ #
160
+ #== Parameters:
161
+ #array:: an array with information from the directory
162
+ #element:: a string with type of information from the directory to retrieve: [name, tag_number, element_type, element_size, number_of_elements, data_size, data_offset
163
+ #
164
+ #== Returns:
165
+ #the element from the array
166
+ def self.get(array, element)
167
+ if element == "name"
168
+ return array[0]
169
+ elsif element == "tag_number"
170
+ return array[1]
171
+ elsif element == "element_type"
172
+ return array[2]
173
+ elsif element == "element_size"
174
+ return array[3]
175
+ elsif element == "number_of_elements"
176
+ return array[4]
177
+ elsif element == "data_size"
178
+ return array[5]
179
+ elsif element == "data_offset"
180
+ return array[6]
199
181
  else
200
- if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
201
- numBases = get(directory[i], "number_of_elements") #number of elements
202
- end
182
+ return array[0]
203
183
  end
204
184
  end
205
-
206
- return numSamples, numBases
207
- end
208
185
 
209
- #extracts the trace information for the bases
210
- #
211
- #== Parameters:
212
- #filestream:: an open File
213
- #directory:: an array of array generated by readDirectoryEntry
214
- #numElements:: an int indicating the number of elements in this ABIF file
215
- #numSamples:: an int calculated by gatherInformation
216
- #
217
- #== Returns:
218
- #four arrays with trace data in the order ACGT
219
- def getSamples(filestream, directory, numElements, numSamples)
220
- samples_a = []
221
- samples_c = []
222
- samples_g = []
223
- samples_t = []
186
+ #counts the number of samples and number of bases contained in this ABIF file
187
+ #
188
+ #== Parameters:
189
+ #directory:: an array of array generated from readDirectoryEntry
190
+ #numElements:: an int indicating the number of elements in this ABIF file
191
+ #
192
+ #== Returns:
193
+ #number of samples and number of bases contained in this ABIF file
194
+ def self.gatherInformation(directory, numElements)
195
+ numSamples = 0
196
+ numBases = 0
224
197
 
225
- #// we guess the order being GATC, as Ferreira and Staden does
226
- (0..numElements-1).each do |i|
227
- tag_number = get(directory[i], "tag_number")
228
- if (get(directory[i],"name") == "DATA") && ([9,10,11,12].include? tag_number)
229
- byteArray_samples = ""
230
- filestream.seek(get(directory[i],"data_offset"), IO::SEEK_SET)
231
- filestream.read(get(directory[i], "number_of_elements")*2, byteArray_samples)
232
- pos = -1
233
- if tag_number == 9 #G
234
- (0..numSamples-1).each do |j|
235
- value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
236
- samples_g[j] = value
237
- end
238
- elsif tag_number == 10 #A
239
- (0..numSamples-1).each do |j|
240
- value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
241
- samples_a[j] = value
198
+ (0..(numElements-1)).each do |i|
199
+ if (get(directory[i],"name") == "DATA") && (get(directory[i], "tag_number") == 9)
200
+ numSamples = get(directory[i], "number_of_elements") #number of elements
201
+ else
202
+ if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
203
+ numBases = get(directory[i], "number_of_elements") #number of elements
242
204
  end
243
- elsif tag_number == 11 #T
244
- (0..numSamples-1).each do |j|
245
- value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
246
- samples_t[j] = value
247
- end
248
- else #C
249
- (0..numSamples-1).each do |j|
250
- value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
251
- samples_c[j] = value
205
+ end
206
+ end
207
+
208
+ return numSamples, numBases
209
+ end
210
+
211
+ #extracts the trace information for the bases
212
+ #
213
+ #== Parameters:
214
+ #filestream:: an open File
215
+ #directory:: an array of array generated by readDirectoryEntry
216
+ #numElements:: an int indicating the number of elements in this ABIF file
217
+ #numSamples:: an int calculated by gatherInformation
218
+ #
219
+ #== Returns:
220
+ #four arrays with trace data in the order ACGT
221
+ def self.getSamples(filestream, directory, numElements, numSamples)
222
+ samples_a = []
223
+ samples_c = []
224
+ samples_g = []
225
+ samples_t = []
226
+
227
+ #// we guess the order being GATC, as Ferreira and Staden does
228
+ (0..numElements-1).each do |i|
229
+ tag_number = get(directory[i], "tag_number")
230
+ if (get(directory[i],"name") == "DATA") && ([9,10,11,12].include? tag_number)
231
+ byteArray_samples = ""
232
+ filestream.seek(get(directory[i],"data_offset"), IO::SEEK_SET)
233
+ filestream.read(get(directory[i], "number_of_elements")*2, byteArray_samples)
234
+ pos = -1
235
+ if tag_number == 9 #G
236
+ (0..numSamples-1).each do |j|
237
+ value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
238
+ samples_g[j] = value
239
+ end
240
+ elsif tag_number == 10 #A
241
+ (0..numSamples-1).each do |j|
242
+ value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
243
+ samples_a[j] = value
244
+ end
245
+ elsif tag_number == 11 #T
246
+ (0..numSamples-1).each do |j|
247
+ value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
248
+ samples_t[j] = value
249
+ end
250
+ else #C
251
+ (0..numSamples-1).each do |j|
252
+ value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
253
+ samples_c[j] = value
254
+ end
252
255
  end
253
256
  end
254
257
  end
258
+ return samples_a, samples_c, samples_g, samples_t
255
259
  end
256
- return samples_a, samples_c, samples_g, samples_t
257
- end
258
260
 
259
- #extracts the called sequence information
260
- #
261
- #== Parameters:
262
- #filestream:: an open File
263
- #directory:: an array of array generated by readDirectoryEntry
264
- #numElements:: an int indicating the number of elements in this ABIF file
265
- #numBases:: an int calculated by gatherInformation
266
- #
267
- #== Returns:
268
- #an array with the called sequence
269
- def getCalledSequence(filestream, directory, numElements, numBases)
270
- calledSequence = []
271
- (0..numElements-1).each do |i|
272
- if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
273
- byteArray_seq = ""
274
- filestream.seek(get(directory[i], "data_offset"))
275
- filestream.read(numBases,byteArray_seq)
276
- (0..numBases-1).each do |j|
277
- calledSequence[j] = byteArray_seq.getbyte(j).chr
261
+ #extracts the called sequence information
262
+ #
263
+ #== Parameters:
264
+ #filestream:: an open File
265
+ #directory:: an array of array generated by readDirectoryEntry
266
+ #numElements:: an int indicating the number of elements in this ABIF file
267
+ #numBases:: an int calculated by gatherInformation
268
+ #
269
+ #== Returns:
270
+ #an array with the called sequence
271
+ def self.getCalledSequence(filestream, directory, numElements, numBases)
272
+ calledSequence = []
273
+ (0..numElements-1).each do |i|
274
+ if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
275
+ byteArray_seq = ""
276
+ filestream.seek(get(directory[i], "data_offset"))
277
+ filestream.read(numBases,byteArray_seq)
278
+ (0..numBases-1).each do |j|
279
+ calledSequence[j] = byteArray_seq.getbyte(j).chr
280
+ end
278
281
  end
279
282
  end
283
+ return calledSequence
280
284
  end
281
- return calledSequence
282
- end
283
285
 
284
- #extracts the trace information for the bases
285
- #
286
- #== Parameters:
287
- #filestream:: an open File
288
- #directory:: an array of array generated by readDirectoryEntry
289
- #numElements:: an int indicating the number of elements in this ABIF file
290
- #numBases:: an int calculated by gatherInformation
291
- #
292
- #== Returns:
293
- #an array with the indexes of the peaks
294
- def getPeakIndexes(filestream, directory, numElements, numBases)
295
- peakIndexes = []
296
- (0..numElements-1).each do |i|
297
- if (get(directory[i], "name") == "PLOC") && (get(directory[i], "tag_number") == 2)
298
- byteArray_peak = ""
299
- filestream.seek(get(directory[i], "data_offset"), IO::SEEK_SET)
300
- filestream.read(get(directory[i], "number_of_elements")*4, byteArray_peak)
301
- pos = -1
302
- (0..numBases-1).each do |j|
303
- peakIndex = byteArray_peak.getbyte(pos+=1) << 8 | byteArray_peak.getbyte(pos+=1)
304
- peakIndexes[j] = peakIndex
286
+ #extracts the trace information for the bases
287
+ #
288
+ #== Parameters:
289
+ #filestream:: an open File
290
+ #directory:: an array of array generated by readDirectoryEntry
291
+ #numElements:: an int indicating the number of elements in this ABIF file
292
+ #numBases:: an int calculated by gatherInformation
293
+ #
294
+ #== Returns:
295
+ #an array with the indexes of the peaks
296
+ def self.getPeakIndexes(filestream, directory, numElements, numBases)
297
+ peakIndexes = []
298
+ (0..numElements-1).each do |i|
299
+ if (get(directory[i], "name") == "PLOC") && (get(directory[i], "tag_number") == 2)
300
+ byteArray_peak = ""
301
+ filestream.seek(get(directory[i], "data_offset"), IO::SEEK_SET)
302
+ filestream.read(get(directory[i], "number_of_elements")*4, byteArray_peak)
303
+ pos = -1
304
+ (0..numBases-1).each do |j|
305
+ peakIndex = byteArray_peak.getbyte(pos+=1) << 8 | byteArray_peak.getbyte(pos+=1)
306
+ peakIndexes[j] = peakIndex
307
+ end
305
308
  end
306
309
  end
310
+ return peakIndexes
307
311
  end
308
- return peakIndexes
312
+
309
313
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: absee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2.3
4
+ version: 0.1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,10 +9,10 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-24 00:00:00.000000000 Z
12
+ date: 2012-11-14 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: reads ABIF sequencing / chromatogram files and extracts the peak indexes,
15
- called sequence, and ACGT values
14
+ description: .ab1 reader / ABIF reader; extracts the peak indexes, called sequence,
15
+ and ACGT values from sequencing files
16
16
  email: jencheng@ginkgobioworks.com
17
17
  executables: []
18
18
  extensions: []
@@ -43,6 +43,6 @@ rubyforge_project:
43
43
  rubygems_version: 1.8.23
44
44
  signing_key:
45
45
  specification_version: 3
46
- summary: reads .ab1 sequencing/chromatogram files
46
+ summary: .ab1 reader / ABIF reader
47
47
  test_files: []
48
48
  has_rdoc: