absee 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/absee.rb +89 -2
  2. metadata +1 -1
@@ -9,6 +9,15 @@
9
9
  #
10
10
  # MIT license 2012
11
11
 
12
+ #opens the ABIF sequencing / chromatogram file
13
+ #checks for ABIF file type
14
+ #major ABIF versions greater than 1 are not supported
15
+ #
16
+ #== Parameters:
17
+ #filename:: a string containing the filename (including the path and extensions)
18
+ #
19
+ #== Returns:
20
+ #Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
12
21
  def readAB(filename)
13
22
  #opens ab1 as a File object
14
23
  abFile = open(filename)
@@ -26,6 +35,14 @@ def readAB(filename)
26
35
  end
27
36
  end
28
37
 
38
+ #process the opened ABIF filestream, and calls subsequent methods to extract the data
39
+ #
40
+ #== Parameters:
41
+ #filestream:: an opened File
42
+ #
43
+ #== Returns:
44
+ #Six arrays: trace data for A, C, G, T, called sequence, and peak indexes
45
+ #readAB returns the results of this method
29
46
  def processAB(filestream)
30
47
  #// here, we can read the ABIF header information
31
48
  version = readUnsignedByte_2(4, filestream)
@@ -45,6 +62,14 @@ def processAB(filestream)
45
62
  return samples_a, samples_c, samples_g, samples_t, called_sequence, peakIndexes
46
63
  end
47
64
 
65
+ #reads 2 unsigned bytes and orders by most significant byte first
66
+ #
67
+ #== Parameters:
68
+ #offset:: how many bytes to offset for the read
69
+ #filestream:: an opened File
70
+ #
71
+ #== Returns:
72
+ #an int ordered by most significant byte first
48
73
  def readUnsignedByte_2(offset, filestream)
49
74
  #// most significant byte first
50
75
  #// |byte0|byte1| <= |unsigned int|
@@ -54,6 +79,14 @@ def readUnsignedByte_2(offset, filestream)
54
79
  return (byteArray.getbyte(0) << 8) | byteArray.getbyte(1)
55
80
  end
56
81
 
82
+ #reads 4 unsigned bytes and orders by most significant byte first
83
+ #
84
+ #== Parameters:
85
+ #offset:: how many bytes to offset for the read
86
+ #filestream:: an opened File
87
+ #
88
+ #== Returns:
89
+ #an int ordered by most significant byte first
57
90
  def readUnsignedByte_4(offset, filestream)
58
91
  byteArray = ""
59
92
  filestream.seek(offset, IO::SEEK_SET)
@@ -63,6 +96,16 @@ def readUnsignedByte_4(offset, filestream)
63
96
  return (byteArray.getbyte(0)<<24) | (byteArray.getbyte(1)<<16) | (byteArray.getbyte(2)<<8) | byteArray.getbyte(3)
64
97
  end
65
98
 
99
+ #reads the data from the directory
100
+ #
101
+ #== Parameters:
102
+ #dataOffset:: how many bytes to offset
103
+ #numElements:: number of elements in the file computed by gatherInformation
104
+ #filestream:: an opened File
105
+ #
106
+ #== Returns:
107
+ #an array of arrays, each with information from the directory
108
+ #[name, tag number, element type, element size, number of elements, data size, data offset]
66
109
  def readDirectoryEntry(filestream, dataOffset, numElements)
67
110
  filestream.seek(dataOffset, IO::SEEK_SET)
68
111
  byteArray = ""
@@ -109,7 +152,14 @@ end
109
152
 
110
153
  #directory structure
111
154
  #[name, tag number, element type, element size, number of elements, data size, data offset]
112
- #this is for easier access to the directory element
155
+ #this is for easier index into the each directory array
156
+ #
157
+ #== Parameters:
158
+ #array:: an array with information from the directory
159
+ #element:: a string with type of information from the directory to retrieve: [name, tag_number, element_type, element_size, number_of_elements, data_size, data_offset
160
+ #
161
+ #== Returns:
162
+ #the element from the array
113
163
  def get(array, element)
114
164
  if element == "name"
115
165
  return array[0]
@@ -130,7 +180,14 @@ def get(array, element)
130
180
  end
131
181
  end
132
182
 
133
-
183
+ #counts the number of samples and number of bases contained in this ABIF file
184
+ #
185
+ #== Parameters:
186
+ #directory:: an array of array generated from readDirectoryEntry
187
+ #numElements:: an int indicating the number of elements in this ABIF file
188
+ #
189
+ #== Returns:
190
+ #number of samples and number of bases contained in this ABIF file
134
191
  def gatherInformation(directory, numElements)
135
192
  numSamples = 0
136
193
  numBases = 0
@@ -148,6 +205,16 @@ def gatherInformation(directory, numElements)
148
205
  return numSamples, numBases
149
206
  end
150
207
 
208
+ #extracts the trace information for the bases
209
+ #
210
+ #== Parameters:
211
+ #filestream:: an open File
212
+ #directory:: an array of array generated by readDirectoryEntry
213
+ #numElements:: an int indicating the number of elements in this ABIF file
214
+ #numSamples:: an int calculated by gatherInformation
215
+ #
216
+ #== Returns:
217
+ #four arrays with trace data in the order ACGT
151
218
  def getSamples(filestream, directory, numElements, numSamples)
152
219
  samples_a = []
153
220
  samples_c = []
@@ -188,6 +255,16 @@ def getSamples(filestream, directory, numElements, numSamples)
188
255
  return samples_a, samples_c, samples_g, samples_t
189
256
  end
190
257
 
258
+ #extracts the called sequence information
259
+ #
260
+ #== Parameters:
261
+ #filestream:: an open File
262
+ #directory:: an array of array generated by readDirectoryEntry
263
+ #numElements:: an int indicating the number of elements in this ABIF file
264
+ #numBases:: an int calculated by gatherInformation
265
+ #
266
+ #== Returns:
267
+ #an array with the called sequence
191
268
  def getCalledSequence(filestream, directory, numElements, numBases)
192
269
  calledSequence = []
193
270
  (0..numElements-1).each do |i|
@@ -203,6 +280,16 @@ def getCalledSequence(filestream, directory, numElements, numBases)
203
280
  return calledSequence
204
281
  end
205
282
 
283
+ #extracts the trace information for the bases
284
+ #
285
+ #== Parameters:
286
+ #filestream:: an open File
287
+ #directory:: an array of array generated by readDirectoryEntry
288
+ #numElements:: an int indicating the number of elements in this ABIF file
289
+ #numBases:: an int calculated by gatherInformation
290
+ #
291
+ #== Returns:
292
+ #an array with the indexes of the peaks
206
293
  def getPeakIndexes(filestream, directory, numElements, numBases)
207
294
  peakIndexes = []
208
295
  (0..numElements-1).each do |i|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: absee
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: