dicom 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,197 +1,100 @@
1
+ # Copyright 2008-2009 Christoffer Lerv�g
2
+
3
+ # Some notes about this DICOM file reading class:
4
+ # In addition to reading files that are compliant to DICOM 3 Part 10,
5
+ # the philosophy of this library is to have maximum compatibility,
6
+ # and thus it will read most 'DICOM' files that deviate from the standard.
7
+ # While reading files, this class will also analyse the hierarchy of elements
8
+ # for those DICOM files that feature sequences and items, enabling the user
9
+ # to take advantage of this information for advanced querying of the
10
+ # DICOM object afterwards.
11
+
1
12
  module DICOM
2
13
  # Class for reading the data from a DICOM file:
3
14
  class DRead
4
15
 
5
- attr_reader :success
16
+ attr_reader :success,:names,:labels,:types,:lengths,:values,:raw,:levels,:explicit,:file_endian,:msg
6
17
 
7
18
  # Initialize the DRead instance.
8
- def initialize(file_name=nil, lib=nil)
9
- @a=0
10
- @b=0
11
- # Variables that hold data that will be returned to the person/procedure using this class:
12
- # Arrays that will hold information from the DICOM file:
13
- @names = Array.new()
14
- @labels = Array.new()
15
- @types = Array.new()
16
- @lengths = Array.new()
17
- @values = Array.new()
18
- @raw = Array.new()
19
- @levels = Array.new()
20
- # Keeping track of how many bytes have been read from the file up to and including each tag:
21
- # This is necessary for tracking the hiearchy in some DICOM files.
22
- @integrated_lengths = Array.new()
23
- @header_length = 0
24
- # Keep track of the hierarchy of tags (this will be used to determine when a sequence or item is finished):
25
- @hierarchy = Array.new()
26
- @hierarchy_error = false
27
- # Array that will holde any messages generated while reading the DICOM file:
28
- @msg = Array.new()
29
- # Explicitness (explicit (true) by default):
30
- @explicit = true
31
- # Explicitness of the remaining groups after the first group:
32
- @rest_explicit = true
33
- # Variable to keep track of whether the current sequence being read have length specified or not:
34
- @sq_length = false
35
- # Variable to keep track of whether the image pixel data in this file are compressed or not, and if it exists at all:
36
- @compression = false
37
- # Pixel data is color or greyscale?
38
- @color = false
39
- # Default endianness of start of DICOM files is little endian:
40
- @file_endian=false
19
+ def initialize(file_name=nil, opts={})
20
+ # Process option values, setting defaults for the ones that are not specified:
21
+ @lib = opts[:lib] || DLibrary.new
22
+ @sys_endian = opts[:sys_endian] || false
41
23
 
42
- # Variables used internally when reading the dicom file:
43
- # If tag does not exist in the library it is unknown:
44
- @unknown = false
45
- # Check endianness of the system (false if little endian):
46
- @sys_endian=check_sys_endian()
47
- # Endianness of the remaining groups after the first group:
48
- @rest_endian=false
49
- # Use a "relationship endian" variable to guide reading of file (true if they are equal):
50
- if @sys_endian == @file_endian
51
- @endian = true
52
- else
53
- @endian = false
54
- end
55
- # A length variable will be used at the end to check whether the last tag was read correctly, or whether the file endend unexpectedly:
56
- @data_length = 0
57
- # Variable used to tell whether file was read succesfully or not:
58
- @success = false
59
- # Keeping track of the tag level while reading through the file:
60
- @current_level = 0
61
-
62
- # Open file for binary reading:
63
- begin
64
- @file = File.new(file_name, "rb")
65
- rescue
66
- @msg += ["Error! Could not open file: "+ file_name + " Returning."]
67
- return
68
- end
69
-
70
- # Load the library class (DICOM dictionary):
71
- if lib != nil
72
- # Library already specified by user:
73
- @lib = lib
74
- else
75
- @lib = DLibrary.new()
76
- end
24
+ # Initiate the variables that are used during file reading:
25
+ init_variables()
26
+
27
+ # Test if file is readable and open it to the @file variable:
28
+ open_file(file_name)
77
29
 
78
30
  # Read the initial header of the file:
79
- header=check_header()
80
- if header == false
81
- @file.close()
82
- @file = File.new(file_name, "rb")
83
- @header_length = 0
84
- elsif header == nil
85
- # Reading the file did not succeed, and we need to abort.
86
- @msg += ["Error! Could not read: "+ file_name + " It might be a directory. Returning."]
31
+ if @file == nil
32
+ # File is not readable, so we return:
87
33
  return
34
+ else
35
+ # Read and verify the DICOM header:
36
+ header = check_header()
37
+ # If there was no header, we will attempt to read tags from the very start of the file:
38
+ if header == false
39
+ @file.close()
40
+ @file = File.new(file_name, "rb")
41
+ @header_length = 0
42
+ end
88
43
  end
89
-
90
- # Initiate the process to read tags:
44
+
45
+ # Run a loop to read the tags:
46
+ # (Tag information is stored in arrays by the method process_tag)
91
47
  tag = true
92
- temp_check=true
93
- while tag != false and temp_check== true do
94
- tag=process_tag()
95
- # Store the tag information in arrays:
96
- if tag != false
97
- @names+=[tag[0]]
98
- @labels+=[tag[1]]
99
- @types+=[tag[2]]
100
- @lengths+=[tag[3]]
101
- @values+=[tag[4]]
102
- @raw+=[tag[5]]
103
- end
48
+ while tag != false do
49
+ tag = process_tag()
104
50
  end
105
- @success = true
106
- # Check the status of the pixel data:
107
- check_pixel_status()
108
- # Index of last element in tag arrays:
109
- @lastIndex=@names.length-1
51
+
52
+ # Post processing:
110
53
  # Close the file as we are finished reading it:
111
54
  @file.close()
112
- # Check if the last tag was read out correctly (that the length of its data corresponds to that expected by the length specified in the DICOM file):
113
- if @data_length != @lengths[@lastIndex]
114
- @msg += ["Error! The data content read from file does not match the length specified for the tag "+ @labels[@lastIndex] + ". It seems this is not a valid DICOM file. Returning."]
115
- @success = false
116
- return
55
+ # Assume file has been read successfully:
56
+ @success = true
57
+ # Check if the last tag was read out correctly (that the length of its data (@raw.last.length)
58
+ # corresponds to that expected by the length specified in the DICOM file (@lengths.last)).
59
+ # We only run this test if the last tag has a positive expectation value, obviously.
60
+ if @lengths.last.to_i > 0
61
+ if @raw.last.length != @lengths.last
62
+ @msg += ["Error! The data content read from file does not match the length specified for the tag #{@labels.last}. It seems this is either an invalid or corrupt DICOM file. Returning."]
63
+ @success = false
64
+ return
65
+ end
117
66
  end
118
- end
67
+ end # of method initialize
119
68
 
120
69
 
121
- # Returns the relevant information gathered from the read dicom procedure.
122
- def return_data()
123
- return [@names,@labels,@types,@lengths,@values,@raw,@levels,@compression,@color,@explicit, @file_endian, @msg]
124
- end
70
+ # Following methods are private:
71
+ private
125
72
 
126
73
 
127
74
  # Checks the initial header of the DICOM file.
128
75
  def check_header()
129
76
  # According to the official DICOM standard, a DICOM file shall contain 128
130
- # consequtive zero bytes followed by 4 bytes that spell the string 'DICM'.
77
+ # consequtive (zero) bytes followed by 4 bytes that spell the string 'DICM'.
131
78
  # Apparently, some providers seems to skip this in their DICOM files.
132
- # First 128 bytes should be zeroes:
133
- begin
134
- bin1=@file.read(128)
135
- @header_length += 128
136
- rescue
137
- # The file could not be read. Most likely because the file name variable supplied to this instance was in fact a directory.
138
- return nil
139
- end
140
- str_header1=bin1.unpack('a' * 128).to_s
79
+ bin1 = @file.read(128)
80
+ @header_length += 128
81
+ #filler = bin1.unpack('a' * 128).to_s
141
82
  # Next 4 bytes should spell 'DICM':
142
- bin2=@file.read(4)
83
+ bin2 = @file.read(4)
143
84
  @header_length += 4
144
- str_header2=bin2.unpack('a' * 4).to_s
145
- # If we dont have this expected header, we will still try to read it is a DICOM file.
146
- if str_header2 != 'DICM' then
147
- @msg+=["Warning: The specified file does not contain the official DICOM header."]
148
- @msg+=["Will try to read the file anyway, as some sources are known to skip the formal DICOM header."]
149
- # Some DICOM files skips group 2, which defines the structure of the DICOM file.
150
- # This has only been observed in files that also skips the above part of the DICOM header.
151
- # Check for skipped group 0002:
152
- group_label=bin1.unpack('h4').to_s.reverse.upcase
153
- if (group_label.include? "2")
154
- #Assume the file starts with a group 0002 tag, as "normal".
155
- # Assume a default transfer syntax: Implicit, Little Endian.
156
- @explicit = false
157
- @rest_explicit = false
158
- @file_endian = false
159
- @rest_endian = false
160
- @compression = false
161
- else
162
- # Assume a default transfer syntax: Implicit, Little Endian.
163
- # (Turns out I use the same settings as above, which makes this somewhat silly, but I'll leave it like this for now in case of any changes later)
164
- @explicit = false
165
- @rest_explicit = false
166
- @file_endian = false
167
- @rest_endian = false
168
- @compression = false
169
- @msg+=["Warning: Group '0002' Transfer Syntax does not exist. Assuming Implicit VR, Little Endian."]
170
- end
85
+ dicm = bin2.unpack('a' * 4).to_s
86
+ if dicm != 'DICM' then
87
+ # Header is not valid (we will still try to read it is a DICOM file though):
88
+ @msg += ["Warning: The specified file does not contain the official DICOM header. Will try to read the file anyway, as some sources are known to skip this header."]
89
+ # As the file is not conforming to the DICOM standard, it is possible that it does not contain a
90
+ # transfer syntax tag, and as such, we attempt to choose the most probable encoding values here:
91
+ @explicit = false
171
92
  return false
172
93
  else
94
+ # Header is valid:
173
95
  return true
174
96
  end
175
- end
176
-
177
-
178
- # Checks the status of the pixel data that has been read from the DICOM file: whether it exists at all and if its greyscale or color.
179
- # Modifies instance variable @color if color image is detected and instance variable @compression if no pixel data is detected.
180
- def check_pixel_status()
181
- # Check if pixel data is present:
182
- pixel_pos = @labels.index("7FE0,0010")
183
- if pixel_pos == nil
184
- @compression = nil
185
- return
186
- end
187
- # Check for color image:
188
- col_string = get_value("0028,0004")
189
- if col_string != false
190
- if (col_string.include? "RGB") or (col_string.include? "COLOR") or (col_string.include? "COLOUR")
191
- @color = true
192
- end
193
- end
194
- end
97
+ end # of method check_header
195
98
 
196
99
 
197
100
  # Governs the process of reading tags in the DICOM file.
@@ -199,106 +102,73 @@ module DICOM
199
102
  #going on here in all cases. Perhaps some day I will get the courage to have a go at it again.)
200
103
  def process_tag()
201
104
  #STEP 1: ------------------------------------------------------
202
- # Read the tag label, but do not continue if the method signals that we have reached end of file:
203
- label=read_label()
105
+ # Attempt to read tag label, but abort if we have reached end of file:
106
+ label = read_label()
204
107
  if label == false
108
+ # End of file, no more tags.
205
109
  return false
206
- end
207
- # Retrieve the tag name and type based on the label we have read from file:
110
+ end
111
+ # STEP 2: ------------------------------------------------------
112
+ # Access library to retrieve the tag name and VR from the label we have read:
208
113
  lib_data = @lib.get_name_vr(label)
209
114
  name = lib_data[0]
210
115
  vr = lib_data[1]
211
- if vr == "UN"
212
- @unknown = true
213
- else
214
- @unknown = false
215
- end
216
- # STEP 2: ----------------------------------------------------
217
- # Continue reading the tag information: Byte type and length.
116
+ # (Note: VR will be overwritten if the DICOM file contains VR)
117
+
118
+ # STEP 3: ----------------------------------------------------
119
+ # Read tag VR (if it exists) and the length value:
218
120
  tag_info = read_type_length(vr,label)
219
121
  type = tag_info[0]
122
+ level_type = type
220
123
  length = tag_info[1]
221
- # For sequence type tag, check if the tag have length specified:
222
- if type == "SQ"
223
- if length == "UNDEFINED" or length.to_i == 0
224
- @sq_length = false
225
- else
226
- @sq_length = true
227
- end
228
- end
229
- # If length is undefined, do not continue to read tag data:
230
- if length == "UNDEFINED"
231
- if label == "7FE0,0010"
232
- data = "(Encapsulated pixel data)"
233
- name = "Encapsulated image(s)"
234
- type = "SQ"
235
- elsif type == "SQ" or type == "()"
236
- # Do not change name of tag.
237
- data = "(Encapsulated tags)"
238
- else
239
- data = "(Encapsulated data)"
240
- name = "Encapsulated information"
241
- end
242
- # Set hiearchy level:
243
- set_level(type, length, label)
244
- return [name,label,type,length,data]
245
- end
246
- # Add the length of the content of the tag to the last element in the integrated_lengths array:
247
- # (but not if it is a sequence or item, as in this case the length of the tag is its sub-tags)
248
- if length.to_i != 0 and type != "SQ" and type != "()"
249
- @integrated_lengths[@integrated_lengths.size-1] += length
250
- end
251
- # Set hiearchy level:
252
- set_level(type, length, label)
253
- # Some special handling for item related tags, which may result in returning without reading data:
254
- if type == "()"
255
- # If length is zero, just return:
256
- if length == 0
257
- type = ""
258
- data = nil
259
- @data_length = 0
260
- return [name,label,type,length,data]
261
- else
262
- # If there is content, this may, in the case of an image, be the image data.
263
- # Must insert the image's type here.
264
- # Some times when this tag has a length, it does not have content in itself, but instead
265
- # have content in a number of subtags.
266
- if @sq_length != true
267
- # Treat the item as containing image data:
268
- type = "OW" # A more general approach should be implemented here.
269
- # For this special case, where item contains the data itself, instead of in sub-tags,
270
- # we declare that there is to be no sub-level after all.
271
- # This handling is not particularly obvious or elegant, and perhaps in the future I will
272
- # be able to rewrite this whole process_tag method to something more sane.
273
- @current_level = @current_level - 1
274
- end
275
- end
276
- end
277
- # STEP 3: ----------------------------------------
278
- # Finally read the tag data.
279
- tag_data = read_data(type,length)
280
- value = tag_data[0]
281
- raw = tag_data[1]
282
- # Check for the Transfer Syntax UID tag, and process it:
283
- if label == "0002,0010"
284
- process_syntax(value)
285
- end
286
- if type == "SQ" or type == "()"
287
- @data_length = length # To avoid false errors. In time perhaps a better way of handling this will be found.
124
+
125
+ # STEP 4: ----------------------------------------
126
+ # Reading value of tag.
127
+ # Special handling needed for items in encapsulated image data:
128
+ if @enc_image and label == "FFFE,E000"
129
+ # The first item appearing after the image tag is a 'normal' item, the rest hold image data.
130
+ # Note that the first item will contain data if there are multiple images, and so must be read.
131
+ type = "OW" # how about alternatives like OB?
132
+ # Modify name of item if this is an item that holds pixel data:
133
+ if @labels.last != "7FE0,0010"
134
+ name = "Pixel Data Item"
135
+ end
136
+ end
137
+ # Read the value of the tag (if it contains data, and it is not a sequence or ordinary item):
138
+ if length.to_i > 0 and type != "SQ" and type != "()"
139
+ # Read the tag data:
140
+ tag_data = read_data(type,length)
141
+ value = tag_data[0]
142
+ raw = tag_data[1]
288
143
  else
289
- @data_length = raw.length
290
- end
291
- return [name,label,type,length,value,raw]
292
- end
293
- # END READ TAG
144
+ # No tag data.
145
+ # Special case: Check if pixel data tag is sequenced:
146
+ if label == "7FE0,0010"
147
+ # Change name and type of pixel data tag if it does not contain data itself:
148
+ name = "Encapsulated Pixel Data"
149
+ level_type = "SQ"
150
+ @enc_image = true
151
+ end
152
+ end # of if length.to_i > 0
153
+ # Set the hiearchy level of this tag:
154
+ set_level(level_type, length, label, name)
155
+ # Transfer the gathered data to arrays and return true:
156
+ @names += [name]
157
+ @labels += [label]
158
+ @types += [type]
159
+ @lengths += [length]
160
+ @values += [value]
161
+ @raw += [raw]
162
+ return true
163
+ end # of method process_tag
294
164
 
295
165
 
296
166
  # Reads and returns TAG LABEL (4 first bytes of tag).
297
167
  def read_label()
298
- bin1=@file.read(2)
299
- bin2=@file.read(2)
300
- # Check if we have reached end of file before proceeding:
301
- if bin1 == nil or bin2 == nil
168
+ bin1 = @file.read(2)
169
+ bin2 = @file.read(2)
170
+ # Do not proceed if we have reached end of file:
171
+ if bin2 == nil
302
172
  return false
303
173
  end
304
174
  # Add the length of the tag label. If this was the first label read from file, we need to add the header length too:
@@ -310,236 +180,195 @@ module DICOM
310
180
  @integrated_lengths += [@integrated_lengths[@integrated_lengths.length-1] + 4]
311
181
  end
312
182
  # Unpack the blobs:
313
- label1=bin1.unpack('h*').to_s.reverse.upcase
314
- label2=bin2.unpack('h*').to_s.reverse.upcase
315
- # Special treatment of tags that are of the first "0002" group:
183
+ label1 = bin1.unpack('h*').to_s.reverse.upcase
184
+ label2 = bin2.unpack('h*').to_s.reverse.upcase
185
+ # Whether DICOM file is big or little endian, the first 0002 group is always little endian encoded.
186
+ # In case of big endian system:
316
187
  if @sys_endian
317
- # Rearrange the numbers:
188
+ # Rearrange the numbers (# This has never been tested btw.):
318
189
  label1 = label1[2..3]+label1[0..1]
319
190
  label2 = label2[2..3]+label2[0..1]
320
- # Has this been verified? Suspect unintended consequence.
321
191
  end
322
- # Process the label, by considering the endian-ness relationship, if are past the initial "0002" group:
323
- if label1 != "0002"
324
- # As we are past the initial little endian part of the file, update the file properties:
325
- @file_endian = @rest_endian
326
- @explicit = @rest_explicit
327
- #Update the endian-relationship variable:
328
- if @sys_endian == @file_endian
329
- @endian = true
330
- else
331
- @endian = false
332
- end
333
- # Do we need to rearrange?
334
- if @endian
335
- # No action needed
336
- else
337
- # Need to rearrange the first and second part of each string:
338
- label1 = label1[2..3]+label1[0..1]
339
- label2 = label2[2..3]+label2[0..1]
340
- end
192
+ # When we shift from group 0002 to another group we need to update our endian/explicitness variables:
193
+ if label1 != "0002" and @switched == false
194
+ switch_syntax()
341
195
  end
342
- # Join the label group and label element together to the final string:
343
- label=label1+","+label2
344
- # Return the label:
345
- return label
346
- end
347
- # END TAG LABEL
196
+ # Perhaps we need to rearrange the labels?:
197
+ if not @endian
198
+ # Need to rearrange the first and second part of each string:
199
+ label1 = label1[2..3]+label1[0..1]
200
+ label2 = label2[2..3]+label2[0..1]
201
+ end
202
+ # Join the label group and label element together to the final string and return:
203
+ return label1+","+label2
204
+ end # of method read_label
348
205
 
349
206
 
350
207
  # Reads and returns TAG TYPE (2 bytes) and TAG LENGTH (Varying length).
351
208
  def read_type_length(type,label)
352
- # Structure will differ, dependent on whether we have explicit or implicit type of file:
353
- # EXPLICIT:
209
+ # Structure will differ, dependent on whether we have explicit or implicit encoding:
210
+ # *****EXPLICIT*****:
354
211
  if @explicit == true
355
- # It seems we need to have a special case for item labels in the explicit scenario:
356
- if label == "FFFE,E000" or label == "FFFE,E00D" or label == "FFFE,E0DD"
357
- bin=@file.read(4)
358
- @integrated_lengths[@integrated_lengths.length-1] += 4
359
- length = get_SL(bin)
360
- else
212
+ # Step 1: Read VR (if it exists)
213
+ unless label == "FFFE,E000" or label == "FFFE,E00D" or label == "FFFE,E0DD"
361
214
  # Read tag type field (2 bytes - since we are not dealing with an item related tag):
362
- bin=@file.read(2)
215
+ bin = @file.read(2)
363
216
  @integrated_lengths[@integrated_lengths.length-1] += 2
364
- type=bin.unpack('a*').to_s
217
+ type = bin.unpack('a*').to_s
365
218
  end
366
- # Two (three) possible structures for value length here, dependent on tag type:
219
+ # Step 2: Read length
220
+ # Three possible structures for value length here, dependent on tag type:
367
221
  case type
368
- when "OB","OW","SQ","UN"
369
- # Two empty bytes should occur here, according to the standard:
370
- bin=@file.read(2)
371
- @integrated_lengths[@integrated_lengths.length-1] += 2
372
- # Read value length (4 bytes):
373
- bin=@file.read(4)
374
- @integrated_lengths[@integrated_lengths.length-1] += 4
375
- length=get_SL(bin)
376
- when "()"
377
- #An empty entry for the item related tags (As it has already been processed).
378
- else
379
- # For all the other tag types: Read value length (2 bytes):
380
- bin=@file.read(2)
381
- @integrated_lengths[@integrated_lengths.length-1] += 2
382
- length=get_US(bin)
222
+ when "OB","OW","SQ","UN"
223
+ # 6 bytes total:
224
+ # Two empty first:
225
+ bin = @file.read(2)
226
+ @integrated_lengths[@integrated_lengths.length-1] += 2
227
+ # Value length (4 bytes):
228
+ bin = @file.read(4)
229
+ @integrated_lengths[@integrated_lengths.length-1] += 4
230
+ length = bin.unpack(@ul)[0]
231
+ when "()"
232
+ # 4 bytes:
233
+ # For labels "FFFE,E000", "FFFE,E00D" and "FFFE,E0DD"
234
+ bin = @file.read(4)
235
+ @integrated_lengths[@integrated_lengths.length-1] += 4
236
+ length = bin.unpack(@ul)[0]
237
+ else
238
+ # 2 bytes:
239
+ # For all the other tag types, value length is 2 bytes:
240
+ bin = @file.read(2)
241
+ @integrated_lengths[@integrated_lengths.length-1] += 2
242
+ length = bin.unpack(@us)[0]
383
243
  end
384
244
  else
385
- #IMPLICIT:
386
- # Read value length (4 bytes):
387
- bin=@file.read(4)
245
+ # *****IMPLICIT*****:
246
+ # No VR (retrieved from library based on the tag's label)
247
+ # Reading value length (4 bytes):
248
+ bin = @file.read(4)
388
249
  @integrated_lengths[@integrated_lengths.length-1] += 4
389
- length = get_SL(bin)
250
+ length = bin.unpack(@ul)[0]
390
251
  end
391
252
  # For encapsulated data, the tag length will not be defined. To convey this,
392
- # the hex sequence 'ff ff ff ff' is used (-1 converted to signed long).
393
- if length == -1
394
- length = "UNDEFINED"
253
+ # the hex sequence 'ff ff ff ff' is used (-1 converted to signed long, 4294967295 converted to unsigned long).
254
+ if length == 4294967295
255
+ length = @undef
395
256
  elsif length%2 >0
396
257
  # According to the DICOM standard, all tag lengths should be an even number.
397
258
  # If it is not, it may indicate a file that is not standards compliant or it might even not be a DICOM file.
398
- @msg+=["Warning: Odd number of bytes in tag length occured. This is a violation of the DICOM standard, but program will attempt to continue reading the rest of the file."]
259
+ @msg += ["Warning: Odd number of bytes in tag length occured. This is a violation of the DICOM standard, but program will still attempt to read the rest of the file."]
399
260
  end
400
- return [type,length]
401
- end
402
- # END BYTE TYPE and TAG LENGTH
261
+ return [type, length]
262
+ end # of method read_type_length
403
263
 
404
264
 
405
- # Reads and returns TAG DATA (Varying length - determined at an earlier stage).
265
+ # Reads and returns TAG DATA (Of varying length - which is determined at an earlier stage).
406
266
  def read_data(type, length)
407
- # Treatment dependent on what type of information we are dealing with.
267
+ # Read the data:
268
+ bin = @file.read(length)
269
+ @integrated_lengths[@integrated_lengths.size-1] += length
270
+ # Decoding of content will naturally depend on what kind of content (VR) we have.
408
271
  case type
409
272
 
410
- # Normally these numbers tags will contain just one number, but in some cases,
411
- # they contain multiple numbers. In such cases we will read each number and store
412
- # them all in a string separated by "/".
413
- # Unsigned long: (4 bytes)
414
- when "UL"
415
- bin = @file.read(length)
416
- if length <= 4
417
- data = get_UL(bin)
418
- else
419
- data = process_numbers(length, type, bin)
420
- end
421
-
422
- # Signed long: (4 bytes)
423
- when "SL"
424
- bin = @file.read(length)
425
- if length <= 4
426
- data = get_SL(bin)
427
- else
428
- data = process_numbers(length, type, bin)
429
- end
273
+ # Normally the "number tags" will contain just one number, but in some cases,
274
+ # they contain multiple numbers. In such cases we will read each number and store
275
+ # them all in a string separated by "/".
276
+ # Unsigned long: (4 bytes)
277
+ when "UL"
278
+ if length <= 4
279
+ data = bin.unpack(@ul)[0]
280
+ else
281
+ data = bin.unpack(@ul).join("/")
282
+ end
430
283
 
431
- # Unsigned short: (2 bytes)
432
- when "US"
433
- bin = @file.read(length)
434
- if length <= 2
435
- data = get_US(bin)
436
- else
437
- data = process_numbers(length, type, bin)
438
- end
284
+ # Signed long: (4 bytes)
285
+ when "SL"
286
+ if length <= 4
287
+ data = bin.unpack(@sl)[0]
288
+ else
289
+ data = bin.unpack(@sl).join("/")
290
+ end
439
291
 
440
- # Signed short: (2 bytes)
441
- when "SS"
442
- bin = @file.read(length)
443
- if length <= 2
444
- data = get_SS(bin)
445
- else
446
- data = process_numbers(length, type, bin)
447
- end
292
+ # Unsigned short: (2 bytes)
293
+ when "US"
294
+ if length <= 2
295
+ data = bin.unpack(@us)[0]
296
+ else
297
+ data = bin.unpack(@us).join("/")
298
+ end
448
299
 
449
- # Floating point double: (8 bytes)
450
- when "FD"
451
- bin = @file.read(length)
452
- if length <= 8
453
- data = get_FD(bin)
454
- else
455
- data = process_numbers(length, type, bin)
456
- end
300
+ # Signed short: (2 bytes)
301
+ when "SS"
302
+ if length <= 2
303
+ data = bin.unpack(@ss)[0]
304
+ else
305
+ data = bin.unpack(@ss).join("/")
306
+ end
457
307
 
458
- # Unknown information, header element is not recognised from local database:
459
- when "UN"
460
- bin=@file.read(length)
461
- data=bin.unpack('H*')[0]
462
-
463
- # A tag that contains items/elements (sequence of elements):
464
- when "SQ"
465
- # The tag has no content in itself, the file starts directly on a new tag adress.
466
- data="(Sequence of Elements)"
467
-
468
- # Item tag:
469
- when "()"
470
- # Tag may have a length, but no content belonging to this tag itself. They are to be read
471
- # for this item's subtags.
472
- data = "(Sequence of Tags)"
473
-
474
- # The tag contains a tag adress (4 bytes):
475
- when "AT"
476
- if length != 4
477
- @msg+=["Warning: Unexpected tag length, expected 4 bytes for tag type 'AT'!"]
478
- end
479
- temp=Array.new(4)
480
- 4.times do |i|
481
- bin=@file.read(1)
482
- temp[i]=bin.unpack('H*')[0]
483
- end
484
- # Put together, mix up the order to get it correct:
485
- data=temp[1].to_s+temp[0].to_s+"."+temp[3].to_s+temp[2].to_s
486
- # This has not been tested with other than Little endian system/file:
487
- if @file_endian or @system_endian
488
- @msg+=["Warning: Handling for tag type 'AT' has not been verified for other than default endianness."]
489
- end
308
+ # Floating point single: (4 bytes)
309
+ when "FL"
310
+ if length <= 4
311
+ data = bin.unpack(@fs)[0]
312
+ else
313
+ data = bin.unpack(@fs).join("/")
314
+ end
490
315
 
491
- # Binary data, used sometimes when we have encapsulated images:
492
- when "OB"
493
- bin=@file.read(length)
494
- data=bin.unpack('H*')[0]
495
-
496
- # Image data:
497
- when "OW"
498
- # We need to know what kind of bith depth the pixel data is saved with:
499
- bit_depth=get_value('0028,0100')
500
- # Proceed to read the image binary data:
501
- bin=@file.read(length)
502
- # Number of bytes used per pixel will determine how to unpack this:
503
- case bit_depth
504
- when 8
505
- data=get_BYTE(bin) # Byte/Character/Fixnum (1 byte)
506
- when 16
507
- data=get_US(bin) # Unsigned short (2 bytes)
508
- when 12
509
- # 12 BIT SIMPLY NOT WORKING YET!
510
- # This one is a bit more tricky to extract.
511
- # I havent really given this priority so far as 12 bit image data is rather rare.
512
- @msg+=["Warning: Bit depth 12 is not working correctly at this time!"]
513
- data=Array.new(length)
514
- (length).times do |i|
515
- hex=bin.unpack('H3')
516
- hex4="0"+hex[0]
517
- num=hex[0].unpack('v')
518
- data[i]=num
316
+ # Floating point double: (8 bytes)
317
+ when "FD"
318
+ if length <= 8
319
+ data = bin.unpack(@fd)[0]
320
+ else
321
+ data = bin.unpack(@fd).join("/")
519
322
  end
520
- else
521
- raise "Bit depth "+bit_depth.to_s+" has not received implementation in this procedure yet."
522
- end
523
323
 
524
- # For everything else, assume string type information:
525
- when 'AE','AS','CS','DA','DS','DT','IS','LO','LT','PN','SH','ST','TM','UI','UT' #,'VR'
526
- bin=@file.read(length)
527
- data=bin.unpack('a*').to_s
528
- else
529
- @msg+=["Warning: Tag type "+type+" does not have a reading method assigned to it. Please update the source code."]
530
- bin=@file.read(length)
531
- data=bin.unpack('H*')[0]
532
- end
324
+ # The tag contains a tag label (4 bytes):
325
+ when "AT"
326
+ # Bytes read in following order: 1 0 , 3 2 (And Hex nibbles read in this order: Hh)
327
+ # NB! This probably needs to be modified when dealing with something other than little endian.
328
+ # Tag label is unpacked to a string in the format GGGGEEEE.
329
+ data = (bin.unpack("xHXhX2HXh").join + bin.unpack("x3HXhX2HXh").join).upcase
330
+ #data = (bin.unpack("xHXhX2HXh").join + "," + bin.unpack("x3HXhX2HXh").join).upcase
331
+
332
+ # We have a number of VRs that are decoded as string:
333
+ when 'AE','AS','CS','DA','DS','DT','IS','LO','LT','PN','SH','ST','TM','UI','UT' #,'VR'
334
+ data = bin.unpack('a*').to_s
335
+
336
+ # NB!
337
+ # FOLLOWING TAG TYPES WILL NOT BE DECODED.
338
+ # DECODING OF PIXEL DATA IS MOVED TO DOBJECT FOR PERFORMANCE REASONS.
339
+
340
+ # Unknown information, header element is not recognised from local database:
341
+ when "UN"
342
+ #data=bin.unpack('H*')[0]
343
+
344
+ # Other byte string, 1-byte integers
345
+ when "OB"
346
+ #data = bin.unpack('H*')[0]
347
+
348
+ # Other float string, 4-byte floating point numbers
349
+ when "OF"
350
+ # NB! This tag type has not been tested yet with an actual DICOM file.
351
+ #data = bin.unpack(@fs)
352
+
353
+ # Image data:
354
+ # Other word string, 2-byte integers
355
+ when "OW"
356
+ # empty
357
+
358
+ # Unknown VR:
359
+ else
360
+ @msg += ["Warning: Tag type #{type} does not have a reading method assigned to it. Please contact the author."]
361
+ #data = bin.unpack('H*')[0]
362
+ end # of case type
533
363
 
534
364
  # Return the data:
535
- return [data,bin]
536
- end
537
- # END TAG DATA
365
+ return [data, bin]
366
+ end # of method read_data
538
367
 
539
368
 
540
369
  # Sets the level of the current tag in the hiearchy.
541
370
  # The default (top) level is zero.
542
- def set_level(type, length, label)
371
+ def set_level(type, length, label, name)
543
372
  # Set the level of this tag:
544
373
  @levels += [@current_level]
545
374
  # Determine if there is a level change for the following tag:
@@ -550,7 +379,7 @@ module DICOM
550
379
  # not in its sub-tags, we should not increase the level. (This is fixed in the process_tag method.)
551
380
  if type == "SQ"
552
381
  increase = true
553
- elsif label =="FFFE,E000"
382
+ elsif name == "Item"
554
383
  increase = true
555
384
  else
556
385
  increase = false
@@ -574,9 +403,11 @@ module DICOM
574
403
  # If it is an array (of length and position), then we need to check the integrated_lengths array
575
404
  # to see if the current sub-level has expired.
576
405
  if @hierarchy.size > 0
577
- check_level_end()
406
+ # Do not perform this check for Pixel Data Items or Sequence Delimitation Items:
407
+ # (If performed, it will give false errors for the case when we have Encapsulated Pixel Data)
408
+ check_level_end() unless name == "Pixel Data Item" or label == "FFFE,E0DD"
578
409
  end
579
- end
410
+ end # of method set_level
580
411
 
581
412
 
582
413
  # Checks how far we've read in the DICOM file to determine if we have reached a point
@@ -607,266 +438,166 @@ module DICOM
607
438
  end
608
439
  end
609
440
  end
610
- end
441
+ end # of method check_level_end
611
442
 
612
443
 
613
- # Returns the (processed) value of a DICOM tag based on an input tag label, category name or array index.
614
- def get_value(id)
615
- # Assume we have been fed a tag label:
616
- pos=@labels.index(id)
617
- # If this does not give a hit, assume we have been fed a tag name:
618
- if pos==nil
619
- pos=@names.index(id)
620
- end
621
- # If we still dont have a hit, check if it is a valid number within the array range:
622
- if pos == nil
623
- if (id.is_a? Integer)
624
- if id >= 0 and id <= @lastIndex
625
- # The id supplied is a valid position, return its corresponding value:
626
- return @values[id]
444
+ # Tests if the file is readable and opens it.
445
+ def open_file(file)
446
+ if File.exist?(file)
447
+ if File.readable?(file)
448
+ if not File.directory?(file)
449
+ if File.size(file) > 8
450
+ @file = File.new(file, "rb")
451
+ else
452
+ @msg += ["Error! File is too small to contain DICOM information. Returning. (#{file})"]
453
+ end
627
454
  else
628
- return false
455
+ @msg += ["Error! File is a directory. Returning. (#{file})"]
629
456
  end
630
457
  else
631
- return false
458
+ @msg += ["Error! File exists but I don't have permission to read it. Returning. (#{file})"]
632
459
  end
633
460
  else
634
- # We have a valid position, return the value:
635
- return @values[pos]
461
+ @msg += ["Error! The file you have supplied does not exist. Returning. (#{file})"]
636
462
  end
637
- end
463
+ end # of method open_file
638
464
 
639
465
 
640
- # Process a series of numbers to return a string containing all the numbers separated with the separator "/" between the numbers.
641
- def process_numbers(length, type, bin)
642
- size = bin.length
643
- data=""
644
- case type
645
- when "UL"
646
- temp1 = get_UL(bin)
647
- when "SL"
648
- temp1 = get_SL(bin)
649
- when "US"
650
- temp1 = get_US(bin)
651
- when "SS"
652
- temp1 = get_SS(bin)
653
- when "FD"
654
- temp1 = get_FD(bin)
655
- else
656
- @msg+=["Warning: Type "+type+"not supported in method process_numbers()."]
657
- end
658
- remain = (length-size)/size
659
- remain.times do
660
- bin = @file.read(size)
661
- case type
662
- when "UL"
663
- temp2 = get_UL(bin)
664
- when "SL"
665
- temp2 = get_SL(bin)
666
- when "US"
667
- temp2 = get_US(bin)
668
- when "SS"
669
- temp2 = get_SS(bin)
670
- when "FD"
671
- temp2 = get_FD(bin)
672
- else
673
- @msg+=["Warning: Type "+type+"not supported in method process_numbers()."]
674
- end
675
- data = temp1.to_s+"/"+temp2.to_s
676
- temp1 = data
677
- end
678
- return data
679
- end
680
-
681
- # Returns a byte integer (1 byte), from the supplied variable.
682
- def get_BYTE(bin)
683
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
684
- elements = bin.size
685
- if @endian
686
- # Native byte order:
687
- if elements > 1
688
- num=bin.unpack('C*')
689
- else
690
- num=bin.unpack('C*')[0]
691
- end
692
- else
693
- # Network byte order: (Unknown what to use here)
694
- @msg+=["Warning: Method get_BYTE not tested with this endian yet!"]
695
- if elements > 1
696
- num=bin.unpack('C*')
697
- else
698
- num=bin.unpack('C*')[0]
699
- end
700
- end
701
- return num
702
- end
703
-
704
-
705
- # Returns a unsigned short (2 bytes), from the supplied variable.
706
- def get_US(bin)
707
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
708
- elements = bin.size/2
709
- if @endian
710
- # Native byte order:
711
- if elements > 1
712
- num=bin.unpack('S*') # or v (little endian (?))
713
- else
714
- num=bin.unpack('S*')[0]
715
- end
716
- else
717
- # Network byte order:
718
- if elements > 1
719
- num=bin.unpack('n*')
720
- else
721
- num=bin.unpack('n*')[0]
722
- end
723
- end
724
- return num
725
- end
726
-
727
-
728
- # Returns a signed short (2 bytes), from the supplied variable.
729
- def get_SS(bin)
730
- elements = bin.size/2
731
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
732
- if @endian
733
- # Native byte order
734
- if elements > 1
735
- num=bin.unpack('s*')
736
- else
737
- num=bin.unpack('s*')[0]
738
- end
739
- else
740
- # Unknown what unpack code to use here:
741
- if elements > 1
742
- num=bin.unpack('s*')
743
- else
744
- num=bin.unpack('s*')[0]
745
- end
746
- @msg+=["Warning: Oppositve endian for signed short is not working yet!"]
747
- end
748
- return num
749
- end
750
-
751
-
752
- # Returns an unsigned long (4 bytes), from the supplied variable.
753
- def get_UL(bin)
754
- elements = bin.size/4
755
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
756
- if @endian
757
- # Unsigned native integer:
758
- if elements > 1
759
- num=bin.unpack('I*')
760
- else
761
- num=bin.unpack('I*')[0]
762
- end
466
+ # Changes encoding variables as the file reading proceeds past the initial 0002 group of the DICOM file.
467
+ def switch_syntax()
468
+ # The information read from the Transfer syntax tag (if present), needs to be processed:
469
+ process_transfer_syntax()
470
+ # We only plan to run this method once:
471
+ @switched = true
472
+ # Update endian, explicitness and unpack variables:
473
+ @file_endian = @rest_endian
474
+ @explicit = @rest_explicit
475
+ if @sys_endian == @file_endian
476
+ @endian = true
763
477
  else
764
- # Unsigned long in network byte order:
765
- if elements > 1
766
- num=bin.unpack('N*')
767
- else
768
- num=bin.unpack('N*')[0]
769
- end
478
+ @endian = false
770
479
  end
771
- return num
480
+ set_unpack_strings()
772
481
  end
773
482
 
774
483
 
775
- # Returns a signed long (4 bytes), from the supplied variable.
776
- def get_SL(bin)
777
- elements = bin.size/4
778
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
779
- if @endian
780
- # Signed native long integer:
781
- if elements > 1
782
- num=bin.unpack('l*')
783
- else
784
- num=bin.unpack('l*')[0]
785
- end
786
- else
787
- @msg+=["Warning: Oppositve endian for signed long is not working yet!"]
788
- if elements > 1
789
- num=bin.unpack('l*')
790
- else
791
- num=bin.unpack('l*')[0]
792
- end
793
- end
794
- return num
795
- end
484
+ # Checks the Transfer Syntax UID tag and updates class variables to prepare for correct reading of DICOM file.
485
+ # A lot of code here is duplicated in DWrite class. Should move as much of this code as possible to DLibrary I think.
486
+ def process_transfer_syntax()
487
+ ts_pos = @labels.index("0002,0010")
488
+ if ts_pos != nil
489
+ ts_value = @raw[ts_pos].unpack('a*').to_s.rstrip
490
+ valid = @lib.check_ts_validity(ts_value)
491
+ if not valid
492
+ @msg+=["Warning: Invalid/unknown transfer syntax! Will try reading the file, but errors may occur."]
493
+ end
494
+ case ts_value
495
+ # Some variations with uncompressed pixel data:
496
+ when "1.2.840.10008.1.2"
497
+ # Implicit VR, Little Endian
498
+ @rest_explicit = false
499
+ @rest_endian = false
500
+ when "1.2.840.10008.1.2.1"
501
+ # Explicit VR, Little Endian
502
+ @rest_explicit = true
503
+ @rest_endian = false
504
+ when "1.2.840.10008.1.2.1.99"
505
+ # Deflated Explicit VR, Little Endian
506
+ @msg += ["Warning: Transfer syntax 'Deflated Explicit VR, Little Endian' is untested. Unknown if this is handled correctly!"]
507
+ @rest_explicit = true
508
+ @rest_endian = false
509
+ when "1.2.840.10008.1.2.2"
510
+ # Explicit VR, Big Endian
511
+ @rest_explicit = true
512
+ @rest_endian = true
513
+ else
514
+ # For everything else, assume compressed pixel data, with Explicit VR, Little Endian:
515
+ @rest_explicit = true
516
+ @rest_endian = false
517
+ end # of case ts_value
518
+ end # of if ts_pos != nil
519
+ end # of method process_syntax
796
520
 
797
521
 
798
- # Returns a floating point double (8 bytes), from the supplied variable.
799
- def get_FD(bin)
800
- elements = bin.size/8
801
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
522
+ # Sets the unpack format strings that will be used for numbers depending on endianness of file/system.
523
+ def set_unpack_strings
802
524
  if @endian
803
- # Double in little-endian byte order:
804
- if elements > 1
805
- num=bin.unpack('E*')
806
- else
807
- num=bin.unpack('E*')[0]
808
- end
525
+ # System endian equals file endian:
526
+ # Native byte order.
527
+ @by = "C*" # Byte (1 byte)
528
+ @us = "S*" # Unsigned short (2 bytes)
529
+ @ss = "s*" # Signed short (2 bytes)
530
+ @ul = "I*" # Unsigned long (4 bytes)
531
+ @sl = "l*" # Signed long (4 bytes)
532
+ @fs = "e*" # Floating point single (4 bytes)
533
+ @fd = "E*" # Floating point double ( 8 bytes)
809
534
  else
810
- # Double in network byte order:
811
- if elements > 1
812
- num=bin.unpack('G*')
813
- else
814
- num=bin.unpack('G*')[0]
815
- end
535
+ # System endian not equal to file endian:
536
+ # Network byte order.
537
+ @by = "C*"
538
+ @us = "n*"
539
+ @ss = "n*" # Not correct (gives US)
540
+ @ul = "N*"
541
+ @sl = "N*" # Not correct (gives UL)
542
+ @fs = "g*"
543
+ @fd = "G*"
816
544
  end
817
- return num
818
545
  end
819
546
 
820
547
 
821
- # Checks the Transfer Syntax UID tag and updates class variables to prepare for correct reading of DICOM file.
822
- def process_syntax(value)
823
- ts = value.rstrip
824
- valid = @lib.check_transfer_syntax(ts)
825
- if not valid
826
- @msg+=["Warning: Invalid/unknown transfer syntax! Will try reading the file, but errors may occur."]
827
- end
828
- case ts
829
- # Some variations with uncompressed pixel data:
830
- when "1.2.840.10008.1.2"
831
- # Implicit VR, Little Endian
832
- @rest_explicit = false
833
- @rest_endian = false
834
- @compression = false
835
- when "1.2.840.10008.1.2.1"
836
- # Explicit VR, Little Endian
837
- @rest_explicit = true
838
- @rest_endian = false
839
- @compression = false
840
- when "1.2.840.10008.1.2.1.99"
841
- # Deflated Explicit VR, Little Endian
842
- @msg+=["Warning: Transfer syntax 'Deflated Explicit VR, Little Endian' is untested. Unknown if this is handled correctly!"]
843
- @rest_explicit = true
844
- @rest_endian = false
845
- @compression = false
846
- when "1.2.840.10008.1.2.2"
847
- # Explicit VR, Big Endian
848
- @rest_explicit = true
849
- @rest_endian = true
850
- @compression = false
548
+ # Initiates the variables that are used during file reading.
549
+ def init_variables()
550
+ # Variables that hold data that will be available to the DObject class.
551
+ # Arrays that will hold information from the tags of the DICOM file:
552
+ @names = Array.new()
553
+ @labels = Array.new()
554
+ @types = Array.new()
555
+ @lengths = Array.new()
556
+ @values = Array.new()
557
+ @raw = Array.new()
558
+ @levels = Array.new()
559
+ # Array that will holde any messages generated while reading the DICOM file:
560
+ @msg = Array.new()
561
+ # Variables that contain properties of the DICOM file:
562
+ # Variable to keep track of whether the image pixel data in this file are compressed or not, and if it exists at all:
563
+ # Default explicitness of start of DICOM file::
564
+ @explicit = true
565
+ # Default endianness of start of DICOM files is little endian:
566
+ @file_endian = false
567
+ # Variable used to tell whether file was read succesfully or not:
568
+ @success = false
569
+
570
+ # Variables used internally when reading through the DICOM file:
571
+ # Array for keeping track of how many bytes have been read from the file up to and including each tag:
572
+ # (This is necessary for tracking the hiearchy in some DICOM files)
573
+ @integrated_lengths = Array.new()
574
+ @header_length = 0
575
+ # Array to keep track of the hierarchy of tags (this will be used to determine when a sequence or item is finished):
576
+ @hierarchy = Array.new()
577
+ @hierarchy_error = false
578
+ # Explicitness of the remaining groups after the initial 0002 group:
579
+ @rest_explicit = false
580
+ # Endianness of the remaining groups after the first group:
581
+ @rest_endian = false
582
+ # When the file switch from group 0002 to a later group we will update encoding values, and this switch will keep track of that:
583
+ @switched = false
584
+ # Use a "relationship endian" variable to guide reading of file:
585
+ if @sys_endian == @file_endian
586
+ @endian = true
851
587
  else
852
- # For everything else, assume compressed pixel data, with Explicit VR, Little Endian:
853
- @rest_explicit = true
854
- @rest_endian = false
855
- @compression = true
588
+ @endian = false
856
589
  end
590
+ # Set which format strings to use when unpacking numbers:
591
+ set_unpack_strings
592
+ # A length variable will be used at the end to check whether the last tag was read correctly, or whether the file endend unexpectedly:
593
+ @data_length = 0
594
+ # Keeping track of the tag level while reading through the file:
595
+ @current_level = 0
596
+ # This variable's string will be inserted as the length of items/sq that dont have a specified length:
597
+ @undef = "UNDEFINED"
598
+ # Items contained under the pixel data tag may contain data directly, so we need a variable to keep track of this:
599
+ @enc_image = false
857
600
  end
858
601
 
859
-
860
- # Checks the endianness of the system. Returns false if little endian, true if big endian.
861
- def check_sys_endian()
862
- x = 0xdeadbeef
863
- endian_type = {
864
- Array(x).pack("V*") => false, #:little
865
- Array(x).pack("N*") => true #:big
866
- }
867
- return endian_type[Array(x).pack("L*")]
868
- end
869
-
870
-
871
- end # End of class.
872
- end # End of module.
602
+ end # End of class
603
+ end # End of module