absee 0.0.2.3 → 0.1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/absee.rb +276 -272
  2. metadata +5 -5
@@ -1,309 +1,313 @@
1
1
  # absee
2
- #
3
- # Jenny Cheng
2
+ #
3
+ # Jenny Cheng
4
4
  # jencheng@ginkgobioworks.com
5
- #
5
+ #
6
6
  # based off of Abi.cs by Ronaldo Rodrigues Ferreira
7
- #
7
+ #
8
8
  # extracts the data from ABIF files
9
9
  #
10
10
  # MIT license 2012
11
11
 
12
- #opens the ABIF sequencing / chromatogram file
13
- #checks for ABIF file type
14
- #major ABIF versions greater than 1 are not supported
15
- #
16
- #== Parameters:
17
- #filename::
18
- # a string containing the filename (including the path and extensions)
19
- #
20
- #== Returns:
21
- # Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
22
- def readAB(filename)
23
- #opens ab1 as a File object
24
- abFile = open(filename)
25
- byteArray = ""
26
- #// here we read the first four bytes. It is important
27
- #// to remember that we do not seek back the file, just
28
- #// because it is not necessary to do this.
29
- abFile.seek(0, IO::SEEK_SET)
30
- abFile.read(4, byteArray)
31
- #ABIF file indicator
32
- if byteArray == "ABIF"
33
- return processAB(abFile)
34
- else
35
- return [],[],[],[],[],[]
12
+ module Absee
13
+
14
+ #opens the ABIF sequencing / chromatogram file
15
+ #checks for ABIF file type
16
+ #major ABIF versions greater than 1 are not supported
17
+ #
18
+ #== Parameters:
19
+ #filename::
20
+ # a string containing the filename (including the path and extensions)
21
+ #
22
+ #== Returns:
23
+ # Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
24
+ def self.readAB(filename)
25
+ #opens ab1 as a File object
26
+ abFile = open(filename)
27
+ byteArray = ""
28
+ #// here we read the first four bytes. It is important
29
+ #// to remember that we do not seek back the file, just
30
+ #// because it is not necessary to do this.
31
+ abFile.seek(0, IO::SEEK_SET)
32
+ abFile.read(4, byteArray)
33
+ #ABIF file indicator
34
+ if byteArray == "ABIF"
35
+ return processAB(abFile)
36
+ else
37
+ return [],[],[],[],[],[]
38
+ end
36
39
  end
37
- end
38
40
 
39
- #process the opened ABIF filestream, and calls subsequent methods to extract the data
40
- #
41
- #== Parameters:
42
- #filestream:: an opened File
43
- #
44
- #== Returns:
45
- #Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
46
- #readAB returns the results of this method
47
- def processAB(filestream)
48
- #// here, we can read the ABIF header information
49
- version = readUnsignedByte_2(4, filestream)
50
- #// major versions greater than 1 are not supported
51
- #// Applied Biosystems rules
52
- if (version / 100 > 1)
53
- return [], [], [], [], [], []
41
+ #process the opened ABIF filestream, and calls subsequent methods to extract the data
42
+ #
43
+ #== Parameters:
44
+ #filestream:: an opened File
45
+ #
46
+ #== Returns:
47
+ #Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
48
+ #readAB returns the results of this method
49
+ def self.processAB(filestream)
50
+ #// here, we can read the ABIF header information
51
+ version = readUnsignedByte_2(4, filestream)
52
+ #// major versions greater than 1 are not supported
53
+ #// Applied Biosystems rules
54
+ if (version / 100 > 1)
55
+ return [], [], [], [], [], []
56
+ end
57
+ #// we just read ABIF, so we don't need more information than that
58
+ numElements = readUnsignedByte_4(18, filestream)
59
+ dataOffset = readUnsignedByte_4(26, filestream)
60
+ directory = readDirectoryEntry(filestream, dataOffset, numElements)
61
+ numSamples, numBases = gatherInformation(directory, numElements)
62
+ samples_a, samples_c, samples_g, samples_t = getSamples(filestream, directory, numElements, numSamples)
63
+ called_sequence = getCalledSequence(filestream, directory, numElements, numBases)
64
+ peakIndexes = getPeakIndexes(filestream, directory, numElements, numBases)
65
+ return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
54
66
  end
55
- #// we just read ABIF, so we don't need more information than that
56
- numElements = readUnsignedByte_4(18, filestream)
57
- dataOffset = readUnsignedByte_4(26, filestream)
58
- directory = readDirectoryEntry(filestream, dataOffset, numElements)
59
- numSamples, numBases = gatherInformation(directory, numElements)
60
- samples_a, samples_c, samples_g, samples_t = getSamples(filestream, directory, numElements, numSamples)
61
- called_sequence = getCalledSequence(filestream, directory, numElements, numBases)
62
- peakIndexes = getPeakIndexes(filestream, directory, numElements, numBases)
63
- return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
64
- end
65
67
 
66
- #reads 2 unsigned bytes and orders by most significant byte first
67
- #
68
- #== Parameters:
69
- #offset:: how many bytes to offset for the read
70
- #filestream:: an opened File
71
- #
72
- #== Returns:
73
- #an int ordered by most significant byte first
74
- def readUnsignedByte_2(offset, filestream)
75
- #// most significant byte first
76
- #// |byte0|byte1| <= |unsigned int|
77
- byteArray = ""
78
- filestream.seek(offset, IO::SEEK_SET)
79
- byteArray = filestream.read(2, byteArray)
80
- return (byteArray.getbyte(0) << 8) | byteArray.getbyte(1)
81
- end
68
+ #reads 2 unsigned bytes and orders by most significant byte first
69
+ #
70
+ #== Parameters:
71
+ #offset:: how many bytes to offset for the read
72
+ #filestream:: an opened File
73
+ #
74
+ #== Returns:
75
+ #an int ordered by most significant byte first
76
+ def self.readUnsignedByte_2(offset, filestream)
77
+ #// most significant byte first
78
+ #// |byte0|byte1| <= |unsigned int|
79
+ byteArray = ""
80
+ filestream.seek(offset, IO::SEEK_SET)
81
+ byteArray = filestream.read(2, byteArray)
82
+ return (byteArray.getbyte(0) << 8) | byteArray.getbyte(1)
83
+ end
82
84
 
83
- #reads 4 unsigned bytes and orders by most significant byte first
84
- #
85
- #== Parameters:
86
- #offset:: how many bytes to offset for the read
87
- #filestream:: an opened File
88
- #
89
- #== Returns:
90
- #an int ordered by most significant byte first
91
- def readUnsignedByte_4(offset, filestream)
92
- byteArray = ""
93
- filestream.seek(offset, IO::SEEK_SET)
94
- byteArray = filestream.read(4, byteArray)
95
- #// most significant byte first
96
- #// |byte0|byte1|byte2|byte3| <= |unsigned int|
97
- return (byteArray.getbyte(0)<<24) | (byteArray.getbyte(1)<<16) | (byteArray.getbyte(2)<<8) | byteArray.getbyte(3)
98
- end
85
+ #reads 4 unsigned bytes and orders by most significant byte first
86
+ #
87
+ #== Parameters:
88
+ #offset:: how many bytes to offset for the read
89
+ #filestream:: an opened File
90
+ #
91
+ #== Returns:
92
+ #an int ordered by most significant byte first
93
+ def self.readUnsignedByte_4(offset, filestream)
94
+ byteArray = ""
95
+ filestream.seek(offset, IO::SEEK_SET)
96
+ byteArray = filestream.read(4, byteArray)
97
+ #// most significant byte first
98
+ #// |byte0|byte1|byte2|byte3| <= |unsigned int|
99
+ return (byteArray.getbyte(0)<<24) | (byteArray.getbyte(1)<<16) | (byteArray.getbyte(2)<<8) | byteArray.getbyte(3)
100
+ end
99
101
 
100
- #reads the data from the directory
101
- #
102
- #== Parameters:
103
- #dataOffset:: how many bytes to offset
104
- #numElements:: number of elements in the file computed by gatherInformation
105
- #filestream:: an opened File
106
- #
107
- #== Returns:
108
- #an array of arrays, each with information from the directory
109
- #[name, tag number, element type, element size, number of elements, data size, data offset]
110
- def readDirectoryEntry(filestream, dataOffset, numElements)
111
- filestream.seek(dataOffset, IO::SEEK_SET)
112
- byteArray = ""
113
- filestream.read(28*numElements, byteArray)
114
- directory = []
115
- pos = -1
116
-
117
-
118
- #directory structure
102
+ #reads the data from the directory
103
+ #
104
+ #== Parameters:
105
+ #dataOffset:: how many bytes to offset
106
+ #numElements:: number of elements in the file computed by gatherInformation
107
+ #filestream:: an opened File
108
+ #
109
+ #== Returns:
110
+ #an array of arrays, each with information from the directory
119
111
  #[name, tag number, element type, element size, number of elements, data size, data offset]
120
- (0..(numElements-1)).each do |i|
121
- directory[i] = []
122
- #// name
123
- name = ""
124
- name << byteArray.getbyte(pos+=1).chr
125
- name << byteArray.getbyte(pos+=1).chr
126
- name << byteArray.getbyte(pos+=1).chr
127
- name << byteArray.getbyte(pos+=1).chr
128
- directory[i] << name
129
- #// tag number
130
- tag_number = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
131
- directory[i] << tag_number
132
- #// element type
133
- element_type = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
134
- directory[i] << element_type
135
- #// element size
136
- element_size = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
137
- directory[i] << element_size
138
- #// number of elements
139
- number_of_elements = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
140
- directory[i] << number_of_elements
141
- #// data size
142
- data_size = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
143
- directory[i] << data_size
144
- #// data offset
145
- data_offset = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
146
- directory[i] << data_offset
147
- #// we do not save the dataHandle field
148
- pos += 4;
149
- end
150
- return directory
151
- end
112
+ def self.readDirectoryEntry(filestream, dataOffset, numElements)
113
+ filestream.seek(dataOffset, IO::SEEK_SET)
114
+ byteArray = ""
115
+ filestream.read(28*numElements, byteArray)
116
+ directory = []
117
+ pos = -1
152
118
 
153
119
 
154
- #directory structure
155
- #[name, tag number, element type, element size, number of elements, data size, data offset]
156
- #this is for easier index into the each directory array
157
- #
158
- #== Parameters:
159
- #array:: an array with information from the directory
160
- #element:: a string with type of information from the directory to retrieve: [name, tag_number, element_type, element_size, number_of_elements, data_size, data_offset
161
- #
162
- #== Returns:
163
- #the element from the array
164
- def get(array, element)
165
- if element == "name"
166
- return array[0]
167
- elsif element == "tag_number"
168
- return array[1]
169
- elsif element == "element_type"
170
- return array[2]
171
- elsif element == "element_size"
172
- return array[3]
173
- elsif element == "number_of_elements"
174
- return array[4]
175
- elsif element == "data_size"
176
- return array[5]
177
- elsif element == "data_offset"
178
- return array[6]
179
- else
180
- return array[0]
120
+ #directory structure
121
+ #[name, tag number, element type, element size, number of elements, data size, data offset]
122
+ (0..(numElements-1)).each do |i|
123
+ directory[i] = []
124
+ #// name
125
+ name = ""
126
+ name << byteArray.getbyte(pos+=1).chr
127
+ name << byteArray.getbyte(pos+=1).chr
128
+ name << byteArray.getbyte(pos+=1).chr
129
+ name << byteArray.getbyte(pos+=1).chr
130
+ directory[i] << name
131
+ #// tag number
132
+ tag_number = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
133
+ directory[i] << tag_number
134
+ #// element type
135
+ element_type = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
136
+ directory[i] << element_type
137
+ #// element size
138
+ element_size = byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
139
+ directory[i] << element_size
140
+ #// number of elements
141
+ number_of_elements = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
142
+ directory[i] << number_of_elements
143
+ #// data size
144
+ data_size = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
145
+ directory[i] << data_size
146
+ #// data offset
147
+ data_offset = byteArray.getbyte(pos+=1)<<24 | byteArray.getbyte(pos+=1)<<16 | byteArray.getbyte(pos+=1)<<8 | byteArray.getbyte(pos+=1)
148
+ directory[i] << data_offset
149
+ #// we do not save the dataHandle field
150
+ pos += 4;
151
+ end
152
+ return directory
181
153
  end
182
- end
183
154
 
184
- #counts the number of samples and number of bases contained in this ABIF file
185
- #
186
- #== Parameters:
187
- #directory:: an array of array generated from readDirectoryEntry
188
- #numElements:: an int indicating the number of elements in this ABIF file
189
- #
190
- #== Returns:
191
- #number of samples and number of bases contained in this ABIF file
192
- def gatherInformation(directory, numElements)
193
- numSamples = 0
194
- numBases = 0
195
-
196
- (0..(numElements-1)).each do |i|
197
- if (get(directory[i],"name") == "DATA") && (get(directory[i], "tag_number") == 9)
198
- numSamples = get(directory[i], "number_of_elements") #number of elements
155
+
156
+ #directory structure
157
+ #[name, tag number, element type, element size, number of elements, data size, data offset]
158
+ #this is for easier index into the each directory array
159
+ #
160
+ #== Parameters:
161
+ #array:: an array with information from the directory
162
+ #element:: a string with type of information from the directory to retrieve: [name, tag_number, element_type, element_size, number_of_elements, data_size, data_offset
163
+ #
164
+ #== Returns:
165
+ #the element from the array
166
+ def self.get(array, element)
167
+ if element == "name"
168
+ return array[0]
169
+ elsif element == "tag_number"
170
+ return array[1]
171
+ elsif element == "element_type"
172
+ return array[2]
173
+ elsif element == "element_size"
174
+ return array[3]
175
+ elsif element == "number_of_elements"
176
+ return array[4]
177
+ elsif element == "data_size"
178
+ return array[5]
179
+ elsif element == "data_offset"
180
+ return array[6]
199
181
  else
200
- if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
201
- numBases = get(directory[i], "number_of_elements") #number of elements
202
- end
182
+ return array[0]
203
183
  end
204
184
  end
205
-
206
- return numSamples, numBases
207
- end
208
185
 
209
- #extracts the trace information for the bases
210
- #
211
- #== Parameters:
212
- #filestream:: an open File
213
- #directory:: an array of array generated by readDirectoryEntry
214
- #numElements:: an int indicating the number of elements in this ABIF file
215
- #numSamples:: an int calculated by gatherInformation
216
- #
217
- #== Returns:
218
- #four arrays with trace data in the order ACGT
219
- def getSamples(filestream, directory, numElements, numSamples)
220
- samples_a = []
221
- samples_c = []
222
- samples_g = []
223
- samples_t = []
186
+ #counts the number of samples and number of bases contained in this ABIF file
187
+ #
188
+ #== Parameters:
189
+ #directory:: an array of array generated from readDirectoryEntry
190
+ #numElements:: an int indicating the number of elements in this ABIF file
191
+ #
192
+ #== Returns:
193
+ #number of samples and number of bases contained in this ABIF file
194
+ def self.gatherInformation(directory, numElements)
195
+ numSamples = 0
196
+ numBases = 0
224
197
 
225
- #// we guess the order being GATC, as Ferreira and Staden does
226
- (0..numElements-1).each do |i|
227
- tag_number = get(directory[i], "tag_number")
228
- if (get(directory[i],"name") == "DATA") && ([9,10,11,12].include? tag_number)
229
- byteArray_samples = ""
230
- filestream.seek(get(directory[i],"data_offset"), IO::SEEK_SET)
231
- filestream.read(get(directory[i], "number_of_elements")*2, byteArray_samples)
232
- pos = -1
233
- if tag_number == 9 #G
234
- (0..numSamples-1).each do |j|
235
- value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
236
- samples_g[j] = value
237
- end
238
- elsif tag_number == 10 #A
239
- (0..numSamples-1).each do |j|
240
- value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
241
- samples_a[j] = value
198
+ (0..(numElements-1)).each do |i|
199
+ if (get(directory[i],"name") == "DATA") && (get(directory[i], "tag_number") == 9)
200
+ numSamples = get(directory[i], "number_of_elements") #number of elements
201
+ else
202
+ if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
203
+ numBases = get(directory[i], "number_of_elements") #number of elements
242
204
  end
243
- elsif tag_number == 11 #T
244
- (0..numSamples-1).each do |j|
245
- value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
246
- samples_t[j] = value
247
- end
248
- else #C
249
- (0..numSamples-1).each do |j|
250
- value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
251
- samples_c[j] = value
205
+ end
206
+ end
207
+
208
+ return numSamples, numBases
209
+ end
210
+
211
+ #extracts the trace information for the bases
212
+ #
213
+ #== Parameters:
214
+ #filestream:: an open File
215
+ #directory:: an array of array generated by readDirectoryEntry
216
+ #numElements:: an int indicating the number of elements in this ABIF file
217
+ #numSamples:: an int calculated by gatherInformation
218
+ #
219
+ #== Returns:
220
+ #four arrays with trace data in the order ACGT
221
+ def self.getSamples(filestream, directory, numElements, numSamples)
222
+ samples_a = []
223
+ samples_c = []
224
+ samples_g = []
225
+ samples_t = []
226
+
227
+ #// we guess the order being GATC, as Ferreira and Staden does
228
+ (0..numElements-1).each do |i|
229
+ tag_number = get(directory[i], "tag_number")
230
+ if (get(directory[i],"name") == "DATA") && ([9,10,11,12].include? tag_number)
231
+ byteArray_samples = ""
232
+ filestream.seek(get(directory[i],"data_offset"), IO::SEEK_SET)
233
+ filestream.read(get(directory[i], "number_of_elements")*2, byteArray_samples)
234
+ pos = -1
235
+ if tag_number == 9 #G
236
+ (0..numSamples-1).each do |j|
237
+ value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
238
+ samples_g[j] = value
239
+ end
240
+ elsif tag_number == 10 #A
241
+ (0..numSamples-1).each do |j|
242
+ value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
243
+ samples_a[j] = value
244
+ end
245
+ elsif tag_number == 11 #T
246
+ (0..numSamples-1).each do |j|
247
+ value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
248
+ samples_t[j] = value
249
+ end
250
+ else #C
251
+ (0..numSamples-1).each do |j|
252
+ value = byteArray_samples.getbyte(pos+=1) << 8 | byteArray_samples.getbyte(pos+=1)
253
+ samples_c[j] = value
254
+ end
252
255
  end
253
256
  end
254
257
  end
258
+ return samples_a, samples_c, samples_g, samples_t
255
259
  end
256
- return samples_a, samples_c, samples_g, samples_t
257
- end
258
260
 
259
- #extracts the called sequence information
260
- #
261
- #== Parameters:
262
- #filestream:: an open File
263
- #directory:: an array of array generated by readDirectoryEntry
264
- #numElements:: an int indicating the number of elements in this ABIF file
265
- #numBases:: an int calculated by gatherInformation
266
- #
267
- #== Returns:
268
- #an array with the called sequence
269
- def getCalledSequence(filestream, directory, numElements, numBases)
270
- calledSequence = []
271
- (0..numElements-1).each do |i|
272
- if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
273
- byteArray_seq = ""
274
- filestream.seek(get(directory[i], "data_offset"))
275
- filestream.read(numBases,byteArray_seq)
276
- (0..numBases-1).each do |j|
277
- calledSequence[j] = byteArray_seq.getbyte(j).chr
261
+ #extracts the called sequence information
262
+ #
263
+ #== Parameters:
264
+ #filestream:: an open File
265
+ #directory:: an array of array generated by readDirectoryEntry
266
+ #numElements:: an int indicating the number of elements in this ABIF file
267
+ #numBases:: an int calculated by gatherInformation
268
+ #
269
+ #== Returns:
270
+ #an array with the called sequence
271
+ def self.getCalledSequence(filestream, directory, numElements, numBases)
272
+ calledSequence = []
273
+ (0..numElements-1).each do |i|
274
+ if (get(directory[i], "name") == "PBAS") && (get(directory[i], "tag_number") == 2)
275
+ byteArray_seq = ""
276
+ filestream.seek(get(directory[i], "data_offset"))
277
+ filestream.read(numBases,byteArray_seq)
278
+ (0..numBases-1).each do |j|
279
+ calledSequence[j] = byteArray_seq.getbyte(j).chr
280
+ end
278
281
  end
279
282
  end
283
+ return calledSequence
280
284
  end
281
- return calledSequence
282
- end
283
285
 
284
- #extracts the trace information for the bases
285
- #
286
- #== Parameters:
287
- #filestream:: an open File
288
- #directory:: an array of array generated by readDirectoryEntry
289
- #numElements:: an int indicating the number of elements in this ABIF file
290
- #numBases:: an int calculated by gatherInformation
291
- #
292
- #== Returns:
293
- #an array with the indexes of the peaks
294
- def getPeakIndexes(filestream, directory, numElements, numBases)
295
- peakIndexes = []
296
- (0..numElements-1).each do |i|
297
- if (get(directory[i], "name") == "PLOC") && (get(directory[i], "tag_number") == 2)
298
- byteArray_peak = ""
299
- filestream.seek(get(directory[i], "data_offset"), IO::SEEK_SET)
300
- filestream.read(get(directory[i], "number_of_elements")*4, byteArray_peak)
301
- pos = -1
302
- (0..numBases-1).each do |j|
303
- peakIndex = byteArray_peak.getbyte(pos+=1) << 8 | byteArray_peak.getbyte(pos+=1)
304
- peakIndexes[j] = peakIndex
286
+ #extracts the trace information for the bases
287
+ #
288
+ #== Parameters:
289
+ #filestream:: an open File
290
+ #directory:: an array of array generated by readDirectoryEntry
291
+ #numElements:: an int indicating the number of elements in this ABIF file
292
+ #numBases:: an int calculated by gatherInformation
293
+ #
294
+ #== Returns:
295
+ #an array with the indexes of the peaks
296
+ def self.getPeakIndexes(filestream, directory, numElements, numBases)
297
+ peakIndexes = []
298
+ (0..numElements-1).each do |i|
299
+ if (get(directory[i], "name") == "PLOC") && (get(directory[i], "tag_number") == 2)
300
+ byteArray_peak = ""
301
+ filestream.seek(get(directory[i], "data_offset"), IO::SEEK_SET)
302
+ filestream.read(get(directory[i], "number_of_elements")*4, byteArray_peak)
303
+ pos = -1
304
+ (0..numBases-1).each do |j|
305
+ peakIndex = byteArray_peak.getbyte(pos+=1) << 8 | byteArray_peak.getbyte(pos+=1)
306
+ peakIndexes[j] = peakIndex
307
+ end
305
308
  end
306
309
  end
310
+ return peakIndexes
307
311
  end
308
- return peakIndexes
312
+
309
313
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: absee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2.3
4
+ version: 0.1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,10 +9,10 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-24 00:00:00.000000000 Z
12
+ date: 2012-11-14 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: reads ABIF sequencing / chromatogram files and extracts the peak indexes,
15
- called sequence, and ACGT values
14
+ description: .ab1 reader / ABIF reader; extracts the peak indexes, called sequence,
15
+ and ACGT values from sequencing files
16
16
  email: jencheng@ginkgobioworks.com
17
17
  executables: []
18
18
  extensions: []
@@ -43,6 +43,6 @@ rubyforge_project:
43
43
  rubygems_version: 1.8.23
44
44
  signing_key:
45
45
  specification_version: 3
46
- summary: reads .ab1 sequencing/chromatogram files
46
+ summary: .ab1 reader / ABIF reader
47
47
  test_files: []
48
48
  has_rdoc: