dicom 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,197 +1,100 @@
1
+ # Copyright 2008-2009 Christoffer Lerv�g
2
+
3
+ # Some notes about this DICOM file reading class:
4
+ # In addition to reading files that are compliant to DICOM 3 Part 10,
5
+ # the philosophy of this library is to have maximum compatibility,
6
+ # and thus it will read most 'DICOM' files that deviate from the standard.
7
+ # While reading files, this class will also analyse the hierarchy of elements
8
+ # for those DICOM files that feature sequences and items, enabling the user
9
+ # to take advantage of this information for advanced querying of the
10
+ # DICOM object afterwards.
11
+
1
12
  module DICOM
2
13
  # Class for reading the data from a DICOM file:
3
14
  class DRead
4
15
 
5
- attr_reader :success
16
+ attr_reader :success,:names,:labels,:types,:lengths,:values,:raw,:levels,:explicit,:file_endian,:msg
6
17
 
7
18
  # Initialize the DRead instance.
8
- def initialize(file_name=nil, lib=nil)
9
- @a=0
10
- @b=0
11
- # Variables that hold data that will be returned to the person/procedure using this class:
12
- # Arrays that will hold information from the DICOM file:
13
- @names = Array.new()
14
- @labels = Array.new()
15
- @types = Array.new()
16
- @lengths = Array.new()
17
- @values = Array.new()
18
- @raw = Array.new()
19
- @levels = Array.new()
20
- # Keeping track of how many bytes have been read from the file up to and including each tag:
21
- # This is necessary for tracking the hiearchy in some DICOM files.
22
- @integrated_lengths = Array.new()
23
- @header_length = 0
24
- # Keep track of the hierarchy of tags (this will be used to determine when a sequence or item is finished):
25
- @hierarchy = Array.new()
26
- @hierarchy_error = false
27
- # Array that will holde any messages generated while reading the DICOM file:
28
- @msg = Array.new()
29
- # Explicitness (explicit (true) by default):
30
- @explicit = true
31
- # Explicitness of the remaining groups after the first group:
32
- @rest_explicit = true
33
- # Variable to keep track of whether the current sequence being read have length specified or not:
34
- @sq_length = false
35
- # Variable to keep track of whether the image pixel data in this file are compressed or not, and if it exists at all:
36
- @compression = false
37
- # Pixel data is color or greyscale?
38
- @color = false
39
- # Default endianness of start of DICOM files is little endian:
40
- @file_endian=false
19
+ def initialize(file_name=nil, opts={})
20
+ # Process option values, setting defaults for the ones that are not specified:
21
+ @lib = opts[:lib] || DLibrary.new
22
+ @sys_endian = opts[:sys_endian] || false
41
23
 
42
- # Variables used internally when reading the dicom file:
43
- # If tag does not exist in the library it is unknown:
44
- @unknown = false
45
- # Check endianness of the system (false if little endian):
46
- @sys_endian=check_sys_endian()
47
- # Endianness of the remaining groups after the first group:
48
- @rest_endian=false
49
- # Use a "relationship endian" variable to guide reading of file (true if they are equal):
50
- if @sys_endian == @file_endian
51
- @endian = true
52
- else
53
- @endian = false
54
- end
55
- # A length variable will be used at the end to check whether the last tag was read correctly, or whether the file endend unexpectedly:
56
- @data_length = 0
57
- # Variable used to tell whether file was read succesfully or not:
58
- @success = false
59
- # Keeping track of the tag level while reading through the file:
60
- @current_level = 0
61
-
62
- # Open file for binary reading:
63
- begin
64
- @file = File.new(file_name, "rb")
65
- rescue
66
- @msg += ["Error! Could not open file: "+ file_name + " Returning."]
67
- return
68
- end
69
-
70
- # Load the library class (DICOM dictionary):
71
- if lib != nil
72
- # Library already specified by user:
73
- @lib = lib
74
- else
75
- @lib = DLibrary.new()
76
- end
24
+ # Initiate the variables that are used during file reading:
25
+ init_variables()
26
+
27
+ # Test if file is readable and open it to the @file variable:
28
+ open_file(file_name)
77
29
 
78
30
  # Read the initial header of the file:
79
- header=check_header()
80
- if header == false
81
- @file.close()
82
- @file = File.new(file_name, "rb")
83
- @header_length = 0
84
- elsif header == nil
85
- # Reading the file did not succeed, and we need to abort.
86
- @msg += ["Error! Could not read: "+ file_name + " It might be a directory. Returning."]
31
+ if @file == nil
32
+ # File is not readable, so we return:
87
33
  return
34
+ else
35
+ # Read and verify the DICOM header:
36
+ header = check_header()
37
+ # If there was no header, we will attempt to read tags from the very start of the file:
38
+ if header == false
39
+ @file.close()
40
+ @file = File.new(file_name, "rb")
41
+ @header_length = 0
42
+ end
88
43
  end
89
-
90
- # Initiate the process to read tags:
44
+
45
+ # Run a loop to read the tags:
46
+ # (Tag information is stored in arrays by the method process_tag)
91
47
  tag = true
92
- temp_check=true
93
- while tag != false and temp_check== true do
94
- tag=process_tag()
95
- # Store the tag information in arrays:
96
- if tag != false
97
- @names+=[tag[0]]
98
- @labels+=[tag[1]]
99
- @types+=[tag[2]]
100
- @lengths+=[tag[3]]
101
- @values+=[tag[4]]
102
- @raw+=[tag[5]]
103
- end
48
+ while tag != false do
49
+ tag = process_tag()
104
50
  end
105
- @success = true
106
- # Check the status of the pixel data:
107
- check_pixel_status()
108
- # Index of last element in tag arrays:
109
- @lastIndex=@names.length-1
51
+
52
+ # Post processing:
110
53
  # Close the file as we are finished reading it:
111
54
  @file.close()
112
- # Check if the last tag was read out correctly (that the length of its data corresponds to that expected by the length specified in the DICOM file):
113
- if @data_length != @lengths[@lastIndex]
114
- @msg += ["Error! The data content read from file does not match the length specified for the tag "+ @labels[@lastIndex] + ". It seems this is not a valid DICOM file. Returning."]
115
- @success = false
116
- return
55
+ # Assume file has been read successfully:
56
+ @success = true
57
+ # Check if the last tag was read out correctly (that the length of its data (@raw.last.length)
58
+ # corresponds to that expected by the length specified in the DICOM file (@lengths.last)).
59
+ # We only run this test if the last tag has a positive expectation value, obviously.
60
+ if @lengths.last.to_i > 0
61
+ if @raw.last.length != @lengths.last
62
+ @msg += ["Error! The data content read from file does not match the length specified for the tag #{@labels.last}. It seems this is either an invalid or corrupt DICOM file. Returning."]
63
+ @success = false
64
+ return
65
+ end
117
66
  end
118
- end
67
+ end # of method initialize
119
68
 
120
69
 
121
- # Returns the relevant information gathered from the read dicom procedure.
122
- def return_data()
123
- return [@names,@labels,@types,@lengths,@values,@raw,@levels,@compression,@color,@explicit, @file_endian, @msg]
124
- end
70
+ # Following methods are private:
71
+ private
125
72
 
126
73
 
127
74
  # Checks the initial header of the DICOM file.
128
75
  def check_header()
129
76
  # According to the official DICOM standard, a DICOM file shall contain 128
130
- # consequtive zero bytes followed by 4 bytes that spell the string 'DICM'.
77
+ # consequtive (zero) bytes followed by 4 bytes that spell the string 'DICM'.
131
78
  # Apparently, some providers seems to skip this in their DICOM files.
132
- # First 128 bytes should be zeroes:
133
- begin
134
- bin1=@file.read(128)
135
- @header_length += 128
136
- rescue
137
- # The file could not be read. Most likely because the file name variable supplied to this instance was in fact a directory.
138
- return nil
139
- end
140
- str_header1=bin1.unpack('a' * 128).to_s
79
+ bin1 = @file.read(128)
80
+ @header_length += 128
81
+ #filler = bin1.unpack('a' * 128).to_s
141
82
  # Next 4 bytes should spell 'DICM':
142
- bin2=@file.read(4)
83
+ bin2 = @file.read(4)
143
84
  @header_length += 4
144
- str_header2=bin2.unpack('a' * 4).to_s
145
- # If we dont have this expected header, we will still try to read it is a DICOM file.
146
- if str_header2 != 'DICM' then
147
- @msg+=["Warning: The specified file does not contain the official DICOM header."]
148
- @msg+=["Will try to read the file anyway, as some sources are known to skip the formal DICOM header."]
149
- # Some DICOM files skips group 2, which defines the structure of the DICOM file.
150
- # This has only been observed in files that also skips the above part of the DICOM header.
151
- # Check for skipped group 0002:
152
- group_label=bin1.unpack('h4').to_s.reverse.upcase
153
- if (group_label.include? "2")
154
- #Assume the file starts with a group 0002 tag, as "normal".
155
- # Assume a default transfer syntax: Implicit, Little Endian.
156
- @explicit = false
157
- @rest_explicit = false
158
- @file_endian = false
159
- @rest_endian = false
160
- @compression = false
161
- else
162
- # Assume a default transfer syntax: Implicit, Little Endian.
163
- # (Turns out I use the same settings as above, which makes this somewhat silly, but I'll leave it like this for now in case of any changes later)
164
- @explicit = false
165
- @rest_explicit = false
166
- @file_endian = false
167
- @rest_endian = false
168
- @compression = false
169
- @msg+=["Warning: Group '0002' Transfer Syntax does not exist. Assuming Implicit VR, Little Endian."]
170
- end
85
+ dicm = bin2.unpack('a' * 4).to_s
86
+ if dicm != 'DICM' then
87
+ # Header is not valid (we will still try to read it is a DICOM file though):
88
+ @msg += ["Warning: The specified file does not contain the official DICOM header. Will try to read the file anyway, as some sources are known to skip this header."]
89
+ # As the file is not conforming to the DICOM standard, it is possible that it does not contain a
90
+ # transfer syntax tag, and as such, we attempt to choose the most probable encoding values here:
91
+ @explicit = false
171
92
  return false
172
93
  else
94
+ # Header is valid:
173
95
  return true
174
96
  end
175
- end
176
-
177
-
178
- # Checks the status of the pixel data that has been read from the DICOM file: whether it exists at all and if its greyscale or color.
179
- # Modifies instance variable @color if color image is detected and instance variable @compression if no pixel data is detected.
180
- def check_pixel_status()
181
- # Check if pixel data is present:
182
- pixel_pos = @labels.index("7FE0,0010")
183
- if pixel_pos == nil
184
- @compression = nil
185
- return
186
- end
187
- # Check for color image:
188
- col_string = get_value("0028,0004")
189
- if col_string != false
190
- if (col_string.include? "RGB") or (col_string.include? "COLOR") or (col_string.include? "COLOUR")
191
- @color = true
192
- end
193
- end
194
- end
97
+ end # of method check_header
195
98
 
196
99
 
197
100
  # Governs the process of reading tags in the DICOM file.
@@ -199,106 +102,73 @@ module DICOM
199
102
  #going on here in all cases. Perhaps some day I will get the courage to have a go at it again.)
200
103
  def process_tag()
201
104
  #STEP 1: ------------------------------------------------------
202
- # Read the tag label, but do not continue if the method signals that we have reached end of file:
203
- label=read_label()
105
+ # Attempt to read tag label, but abort if we have reached end of file:
106
+ label = read_label()
204
107
  if label == false
108
+ # End of file, no more tags.
205
109
  return false
206
- end
207
- # Retrieve the tag name and type based on the label we have read from file:
110
+ end
111
+ # STEP 2: ------------------------------------------------------
112
+ # Access library to retrieve the tag name and VR from the label we have read:
208
113
  lib_data = @lib.get_name_vr(label)
209
114
  name = lib_data[0]
210
115
  vr = lib_data[1]
211
- if vr == "UN"
212
- @unknown = true
213
- else
214
- @unknown = false
215
- end
216
- # STEP 2: ----------------------------------------------------
217
- # Continue reading the tag information: Byte type and length.
116
+ # (Note: VR will be overwritten if the DICOM file contains VR)
117
+
118
+ # STEP 3: ----------------------------------------------------
119
+ # Read tag VR (if it exists) and the length value:
218
120
  tag_info = read_type_length(vr,label)
219
121
  type = tag_info[0]
122
+ level_type = type
220
123
  length = tag_info[1]
221
- # For sequence type tag, check if the tag have length specified:
222
- if type == "SQ"
223
- if length == "UNDEFINED" or length.to_i == 0
224
- @sq_length = false
225
- else
226
- @sq_length = true
227
- end
228
- end
229
- # If length is undefined, do not continue to read tag data:
230
- if length == "UNDEFINED"
231
- if label == "7FE0,0010"
232
- data = "(Encapsulated pixel data)"
233
- name = "Encapsulated image(s)"
234
- type = "SQ"
235
- elsif type == "SQ" or type == "()"
236
- # Do not change name of tag.
237
- data = "(Encapsulated tags)"
238
- else
239
- data = "(Encapsulated data)"
240
- name = "Encapsulated information"
241
- end
242
- # Set hiearchy level:
243
- set_level(type, length, label)
244
- return [name,label,type,length,data]
245
- end
246
- # Add the length of the content of the tag to the last element in the integrated_lengths array:
247
- # (but not if it is a sequence or item, as in this case the length of the tag is its sub-tags)
248
- if length.to_i != 0 and type != "SQ" and type != "()"
249
- @integrated_lengths[@integrated_lengths.size-1] += length
250
- end
251
- # Set hiearchy level:
252
- set_level(type, length, label)
253
- # Some special handling for item related tags, which may result in returning without reading data:
254
- if type == "()"
255
- # If length is zero, just return:
256
- if length == 0
257
- type = ""
258
- data = nil
259
- @data_length = 0
260
- return [name,label,type,length,data]
261
- else
262
- # If there is content, this may, in the case of an image, be the image data.
263
- # Must insert the image's type here.
264
- # Some times when this tag has a length, it does not have content in itself, but instead
265
- # have content in a number of subtags.
266
- if @sq_length != true
267
- # Treat the item as containing image data:
268
- type = "OW" # A more general approach should be implemented here.
269
- # For this special case, where item contains the data itself, instead of in sub-tags,
270
- # we declare that there is to be no sub-level after all.
271
- # This handling is not particularly obvious or elegant, and perhaps in the future I will
272
- # be able to rewrite this whole process_tag method to something more sane.
273
- @current_level = @current_level - 1
274
- end
275
- end
276
- end
277
- # STEP 3: ----------------------------------------
278
- # Finally read the tag data.
279
- tag_data = read_data(type,length)
280
- value = tag_data[0]
281
- raw = tag_data[1]
282
- # Check for the Transfer Syntax UID tag, and process it:
283
- if label == "0002,0010"
284
- process_syntax(value)
285
- end
286
- if type == "SQ" or type == "()"
287
- @data_length = length # To avoid false errors. In time perhaps a better way of handling this will be found.
124
+
125
+ # STEP 4: ----------------------------------------
126
+ # Reading value of tag.
127
+ # Special handling needed for items in encapsulated image data:
128
+ if @enc_image and label == "FFFE,E000"
129
+ # The first item appearing after the image tag is a 'normal' item, the rest hold image data.
130
+ # Note that the first item will contain data if there are multiple images, and so must be read.
131
+ type = "OW" # how about alternatives like OB?
132
+ # Modify name of item if this is an item that holds pixel data:
133
+ if @labels.last != "7FE0,0010"
134
+ name = "Pixel Data Item"
135
+ end
136
+ end
137
+ # Read the value of the tag (if it contains data, and it is not a sequence or ordinary item):
138
+ if length.to_i > 0 and type != "SQ" and type != "()"
139
+ # Read the tag data:
140
+ tag_data = read_data(type,length)
141
+ value = tag_data[0]
142
+ raw = tag_data[1]
288
143
  else
289
- @data_length = raw.length
290
- end
291
- return [name,label,type,length,value,raw]
292
- end
293
- # END READ TAG
144
+ # No tag data.
145
+ # Special case: Check if pixel data tag is sequenced:
146
+ if label == "7FE0,0010"
147
+ # Change name and type of pixel data tag if it does not contain data itself:
148
+ name = "Encapsulated Pixel Data"
149
+ level_type = "SQ"
150
+ @enc_image = true
151
+ end
152
+ end # of if length.to_i > 0
153
+ # Set the hiearchy level of this tag:
154
+ set_level(level_type, length, label, name)
155
+ # Transfer the gathered data to arrays and return true:
156
+ @names += [name]
157
+ @labels += [label]
158
+ @types += [type]
159
+ @lengths += [length]
160
+ @values += [value]
161
+ @raw += [raw]
162
+ return true
163
+ end # of method process_tag
294
164
 
295
165
 
296
166
  # Reads and returns TAG LABEL (4 first bytes of tag).
297
167
  def read_label()
298
- bin1=@file.read(2)
299
- bin2=@file.read(2)
300
- # Check if we have reached end of file before proceeding:
301
- if bin1 == nil or bin2 == nil
168
+ bin1 = @file.read(2)
169
+ bin2 = @file.read(2)
170
+ # Do not proceed if we have reached end of file:
171
+ if bin2 == nil
302
172
  return false
303
173
  end
304
174
  # Add the length of the tag label. If this was the first label read from file, we need to add the header length too:
@@ -310,236 +180,195 @@ module DICOM
310
180
  @integrated_lengths += [@integrated_lengths[@integrated_lengths.length-1] + 4]
311
181
  end
312
182
  # Unpack the blobs:
313
- label1=bin1.unpack('h*').to_s.reverse.upcase
314
- label2=bin2.unpack('h*').to_s.reverse.upcase
315
- # Special treatment of tags that are of the first "0002" group:
183
+ label1 = bin1.unpack('h*').to_s.reverse.upcase
184
+ label2 = bin2.unpack('h*').to_s.reverse.upcase
185
+ # Whether DICOM file is big or little endian, the first 0002 group is always little endian encoded.
186
+ # In case of big endian system:
316
187
  if @sys_endian
317
- # Rearrange the numbers:
188
+ # Rearrange the numbers (# This has never been tested btw.):
318
189
  label1 = label1[2..3]+label1[0..1]
319
190
  label2 = label2[2..3]+label2[0..1]
320
- # Has this been verified? Suspect unintended consequence.
321
191
  end
322
- # Process the label, by considering the endian-ness relationship, if are past the initial "0002" group:
323
- if label1 != "0002"
324
- # As we are past the initial little endian part of the file, update the file properties:
325
- @file_endian = @rest_endian
326
- @explicit = @rest_explicit
327
- #Update the endian-relationship variable:
328
- if @sys_endian == @file_endian
329
- @endian = true
330
- else
331
- @endian = false
332
- end
333
- # Do we need to rearrange?
334
- if @endian
335
- # No action needed
336
- else
337
- # Need to rearrange the first and second part of each string:
338
- label1 = label1[2..3]+label1[0..1]
339
- label2 = label2[2..3]+label2[0..1]
340
- end
192
+ # When we shift from group 0002 to another group we need to update our endian/explicitness variables:
193
+ if label1 != "0002" and @switched == false
194
+ switch_syntax()
341
195
  end
342
- # Join the label group and label element together to the final string:
343
- label=label1+","+label2
344
- # Return the label:
345
- return label
346
- end
347
- # END TAG LABEL
196
+ # Perhaps we need to rearrange the labels?:
197
+ if not @endian
198
+ # Need to rearrange the first and second part of each string:
199
+ label1 = label1[2..3]+label1[0..1]
200
+ label2 = label2[2..3]+label2[0..1]
201
+ end
202
+ # Join the label group and label element together to the final string and return:
203
+ return label1+","+label2
204
+ end # of method read_label
348
205
 
349
206
 
350
207
  # Reads and returns TAG TYPE (2 bytes) and TAG LENGTH (Varying length).
351
208
  def read_type_length(type,label)
352
- # Structure will differ, dependent on whether we have explicit or implicit type of file:
353
- # EXPLICIT:
209
+ # Structure will differ, dependent on whether we have explicit or implicit encoding:
210
+ # *****EXPLICIT*****:
354
211
  if @explicit == true
355
- # It seems we need to have a special case for item labels in the explicit scenario:
356
- if label == "FFFE,E000" or label == "FFFE,E00D" or label == "FFFE,E0DD"
357
- bin=@file.read(4)
358
- @integrated_lengths[@integrated_lengths.length-1] += 4
359
- length = get_SL(bin)
360
- else
212
+ # Step 1: Read VR (if it exists)
213
+ unless label == "FFFE,E000" or label == "FFFE,E00D" or label == "FFFE,E0DD"
361
214
  # Read tag type field (2 bytes - since we are not dealing with an item related tag):
362
- bin=@file.read(2)
215
+ bin = @file.read(2)
363
216
  @integrated_lengths[@integrated_lengths.length-1] += 2
364
- type=bin.unpack('a*').to_s
217
+ type = bin.unpack('a*').to_s
365
218
  end
366
- # Two (three) possible structures for value length here, dependent on tag type:
219
+ # Step 2: Read length
220
+ # Three possible structures for value length here, dependent on tag type:
367
221
  case type
368
- when "OB","OW","SQ","UN"
369
- # Two empty bytes should occur here, according to the standard:
370
- bin=@file.read(2)
371
- @integrated_lengths[@integrated_lengths.length-1] += 2
372
- # Read value length (4 bytes):
373
- bin=@file.read(4)
374
- @integrated_lengths[@integrated_lengths.length-1] += 4
375
- length=get_SL(bin)
376
- when "()"
377
- #An empty entry for the item related tags (As it has already been processed).
378
- else
379
- # For all the other tag types: Read value length (2 bytes):
380
- bin=@file.read(2)
381
- @integrated_lengths[@integrated_lengths.length-1] += 2
382
- length=get_US(bin)
222
+ when "OB","OW","SQ","UN"
223
+ # 6 bytes total:
224
+ # Two empty first:
225
+ bin = @file.read(2)
226
+ @integrated_lengths[@integrated_lengths.length-1] += 2
227
+ # Value length (4 bytes):
228
+ bin = @file.read(4)
229
+ @integrated_lengths[@integrated_lengths.length-1] += 4
230
+ length = bin.unpack(@ul)[0]
231
+ when "()"
232
+ # 4 bytes:
233
+ # For labels "FFFE,E000", "FFFE,E00D" and "FFFE,E0DD"
234
+ bin = @file.read(4)
235
+ @integrated_lengths[@integrated_lengths.length-1] += 4
236
+ length = bin.unpack(@ul)[0]
237
+ else
238
+ # 2 bytes:
239
+ # For all the other tag types, value length is 2 bytes:
240
+ bin = @file.read(2)
241
+ @integrated_lengths[@integrated_lengths.length-1] += 2
242
+ length = bin.unpack(@us)[0]
383
243
  end
384
244
  else
385
- #IMPLICIT:
386
- # Read value length (4 bytes):
387
- bin=@file.read(4)
245
+ # *****IMPLICIT*****:
246
+ # No VR (retrieved from library based on the tag's label)
247
+ # Reading value length (4 bytes):
248
+ bin = @file.read(4)
388
249
  @integrated_lengths[@integrated_lengths.length-1] += 4
389
- length = get_SL(bin)
250
+ length = bin.unpack(@ul)[0]
390
251
  end
391
252
  # For encapsulated data, the tag length will not be defined. To convey this,
392
- # the hex sequence 'ff ff ff ff' is used (-1 converted to signed long).
393
- if length == -1
394
- length = "UNDEFINED"
253
+ # the hex sequence 'ff ff ff ff' is used (-1 converted to signed long, 4294967295 converted to unsigned long).
254
+ if length == 4294967295
255
+ length = @undef
395
256
  elsif length%2 >0
396
257
  # According to the DICOM standard, all tag lengths should be an even number.
397
258
  # If it is not, it may indicate a file that is not standards compliant or it might even not be a DICOM file.
398
- @msg+=["Warning: Odd number of bytes in tag length occured. This is a violation of the DICOM standard, but program will attempt to continue reading the rest of the file."]
259
+ @msg += ["Warning: Odd number of bytes in tag length occured. This is a violation of the DICOM standard, but program will still attempt to read the rest of the file."]
399
260
  end
400
- return [type,length]
401
- end
402
- # END BYTE TYPE and TAG LENGTH
261
+ return [type, length]
262
+ end # of method read_type_length
403
263
 
404
264
 
405
- # Reads and returns TAG DATA (Varying length - determined at an earlier stage).
265
+ # Reads and returns TAG DATA (Of varying length - which is determined at an earlier stage).
406
266
  def read_data(type, length)
407
- # Treatment dependent on what type of information we are dealing with.
267
+ # Read the data:
268
+ bin = @file.read(length)
269
+ @integrated_lengths[@integrated_lengths.size-1] += length
270
+ # Decoding of content will naturally depend on what kind of content (VR) we have.
408
271
  case type
409
272
 
410
- # Normally these numbers tags will contain just one number, but in some cases,
411
- # they contain multiple numbers. In such cases we will read each number and store
412
- # them all in a string separated by "/".
413
- # Unsigned long: (4 bytes)
414
- when "UL"
415
- bin = @file.read(length)
416
- if length <= 4
417
- data = get_UL(bin)
418
- else
419
- data = process_numbers(length, type, bin)
420
- end
421
-
422
- # Signed long: (4 bytes)
423
- when "SL"
424
- bin = @file.read(length)
425
- if length <= 4
426
- data = get_SL(bin)
427
- else
428
- data = process_numbers(length, type, bin)
429
- end
273
+ # Normally the "number tags" will contain just one number, but in some cases,
274
+ # they contain multiple numbers. In such cases we will read each number and store
275
+ # them all in a string separated by "/".
276
+ # Unsigned long: (4 bytes)
277
+ when "UL"
278
+ if length <= 4
279
+ data = bin.unpack(@ul)[0]
280
+ else
281
+ data = bin.unpack(@ul).join("/")
282
+ end
430
283
 
431
- # Unsigned short: (2 bytes)
432
- when "US"
433
- bin = @file.read(length)
434
- if length <= 2
435
- data = get_US(bin)
436
- else
437
- data = process_numbers(length, type, bin)
438
- end
284
+ # Signed long: (4 bytes)
285
+ when "SL"
286
+ if length <= 4
287
+ data = bin.unpack(@sl)[0]
288
+ else
289
+ data = bin.unpack(@sl).join("/")
290
+ end
439
291
 
440
- # Signed short: (2 bytes)
441
- when "SS"
442
- bin = @file.read(length)
443
- if length <= 2
444
- data = get_SS(bin)
445
- else
446
- data = process_numbers(length, type, bin)
447
- end
292
+ # Unsigned short: (2 bytes)
293
+ when "US"
294
+ if length <= 2
295
+ data = bin.unpack(@us)[0]
296
+ else
297
+ data = bin.unpack(@us).join("/")
298
+ end
448
299
 
449
- # Floating point double: (8 bytes)
450
- when "FD"
451
- bin = @file.read(length)
452
- if length <= 8
453
- data = get_FD(bin)
454
- else
455
- data = process_numbers(length, type, bin)
456
- end
300
+ # Signed short: (2 bytes)
301
+ when "SS"
302
+ if length <= 2
303
+ data = bin.unpack(@ss)[0]
304
+ else
305
+ data = bin.unpack(@ss).join("/")
306
+ end
457
307
 
458
- # Unknown information, header element is not recognised from local database:
459
- when "UN"
460
- bin=@file.read(length)
461
- data=bin.unpack('H*')[0]
462
-
463
- # A tag that contains items/elements (sequence of elements):
464
- when "SQ"
465
- # The tag has no content in itself, the file starts directly on a new tag adress.
466
- data="(Sequence of Elements)"
467
-
468
- # Item tag:
469
- when "()"
470
- # Tag may have a length, but no content belonging to this tag itself. They are to be read
471
- # for this item's subtags.
472
- data = "(Sequence of Tags)"
473
-
474
- # The tag contains a tag adress (4 bytes):
475
- when "AT"
476
- if length != 4
477
- @msg+=["Warning: Unexpected tag length, expected 4 bytes for tag type 'AT'!"]
478
- end
479
- temp=Array.new(4)
480
- 4.times do |i|
481
- bin=@file.read(1)
482
- temp[i]=bin.unpack('H*')[0]
483
- end
484
- # Put together, mix up the order to get it correct:
485
- data=temp[1].to_s+temp[0].to_s+"."+temp[3].to_s+temp[2].to_s
486
- # This has not been tested with other than Little endian system/file:
487
- if @file_endian or @system_endian
488
- @msg+=["Warning: Handling for tag type 'AT' has not been verified for other than default endianness."]
489
- end
308
+ # Floating point single: (4 bytes)
309
+ when "FL"
310
+ if length <= 4
311
+ data = bin.unpack(@fs)[0]
312
+ else
313
+ data = bin.unpack(@fs).join("/")
314
+ end
490
315
 
491
- # Binary data, used sometimes when we have encapsulated images:
492
- when "OB"
493
- bin=@file.read(length)
494
- data=bin.unpack('H*')[0]
495
-
496
- # Image data:
497
- when "OW"
498
- # We need to know what kind of bith depth the pixel data is saved with:
499
- bit_depth=get_value('0028,0100')
500
- # Proceed to read the image binary data:
501
- bin=@file.read(length)
502
- # Number of bytes used per pixel will determine how to unpack this:
503
- case bit_depth
504
- when 8
505
- data=get_BYTE(bin) # Byte/Character/Fixnum (1 byte)
506
- when 16
507
- data=get_US(bin) # Unsigned short (2 bytes)
508
- when 12
509
- # 12 BIT SIMPLY NOT WORKING YET!
510
- # This one is a bit more tricky to extract.
511
- # I havent really given this priority so far as 12 bit image data is rather rare.
512
- @msg+=["Warning: Bit depth 12 is not working correctly at this time!"]
513
- data=Array.new(length)
514
- (length).times do |i|
515
- hex=bin.unpack('H3')
516
- hex4="0"+hex[0]
517
- num=hex[0].unpack('v')
518
- data[i]=num
316
+ # Floating point double: (8 bytes)
317
+ when "FD"
318
+ if length <= 8
319
+ data = bin.unpack(@fd)[0]
320
+ else
321
+ data = bin.unpack(@fd).join("/")
519
322
  end
520
- else
521
- raise "Bit depth "+bit_depth.to_s+" has not received implementation in this procedure yet."
522
- end
523
323
 
524
- # For everything else, assume string type information:
525
- when 'AE','AS','CS','DA','DS','DT','IS','LO','LT','PN','SH','ST','TM','UI','UT' #,'VR'
526
- bin=@file.read(length)
527
- data=bin.unpack('a*').to_s
528
- else
529
- @msg+=["Warning: Tag type "+type+" does not have a reading method assigned to it. Please update the source code."]
530
- bin=@file.read(length)
531
- data=bin.unpack('H*')[0]
532
- end
324
+ # The tag contains a tag label (4 bytes):
325
+ when "AT"
326
+ # Bytes read in following order: 1 0 , 3 2 (And Hex nibbles read in this order: Hh)
327
+ # NB! This probably needs to be modified when dealing with something other than little endian.
328
+ # Tag label is unpacked to a string in the format GGGGEEEE.
329
+ data = (bin.unpack("xHXhX2HXh").join + bin.unpack("x3HXhX2HXh").join).upcase
330
+ #data = (bin.unpack("xHXhX2HXh").join + "," + bin.unpack("x3HXhX2HXh").join).upcase
331
+
332
+ # We have a number of VRs that are decoded as string:
333
+ when 'AE','AS','CS','DA','DS','DT','IS','LO','LT','PN','SH','ST','TM','UI','UT' #,'VR'
334
+ data = bin.unpack('a*').to_s
335
+
336
+ # NB!
337
+ # FOLLOWING TAG TYPES WILL NOT BE DECODED.
338
+ # DECODING OF PIXEL DATA IS MOVED TO DOBJECT FOR PERFORMANCE REASONS.
339
+
340
+ # Unknown information, header element is not recognised from local database:
341
+ when "UN"
342
+ #data=bin.unpack('H*')[0]
343
+
344
+ # Other byte string, 1-byte integers
345
+ when "OB"
346
+ #data = bin.unpack('H*')[0]
347
+
348
+ # Other float string, 4-byte floating point numbers
349
+ when "OF"
350
+ # NB! This tag type has not been tested yet with an actual DICOM file.
351
+ #data = bin.unpack(@fs)
352
+
353
+ # Image data:
354
+ # Other word string, 2-byte integers
355
+ when "OW"
356
+ # empty
357
+
358
+ # Unknown VR:
359
+ else
360
+ @msg += ["Warning: Tag type #{type} does not have a reading method assigned to it. Please contact the author."]
361
+ #data = bin.unpack('H*')[0]
362
+ end # of case type
533
363
 
534
364
  # Return the data:
535
- return [data,bin]
536
- end
537
- # END TAG DATA
365
+ return [data, bin]
366
+ end # of method read_data
538
367
 
539
368
 
540
369
  # Sets the level of the current tag in the hiearchy.
541
370
  # The default (top) level is zero.
542
- def set_level(type, length, label)
371
+ def set_level(type, length, label, name)
543
372
  # Set the level of this tag:
544
373
  @levels += [@current_level]
545
374
  # Determine if there is a level change for the following tag:
@@ -550,7 +379,7 @@ module DICOM
550
379
  # not in its sub-tags, we should not increase the level. (This is fixed in the process_tag method.)
551
380
  if type == "SQ"
552
381
  increase = true
553
- elsif label =="FFFE,E000"
382
+ elsif name == "Item"
554
383
  increase = true
555
384
  else
556
385
  increase = false
@@ -574,9 +403,11 @@ module DICOM
574
403
  # If it is an array (of length and position), then we need to check the integrated_lengths array
575
404
  # to see if the current sub-level has expired.
576
405
  if @hierarchy.size > 0
577
- check_level_end()
406
+ # Do not perform this check for Pixel Data Items or Sequence Delimitation Items:
407
+ # (If performed, it will give false errors for the case when we have Encapsulated Pixel Data)
408
+ check_level_end() unless name == "Pixel Data Item" or label == "FFFE,E0DD"
578
409
  end
579
- end
410
+ end # of method set_level
580
411
 
581
412
 
582
413
  # Checks how far we've read in the DICOM file to determine if we have reached a point
@@ -607,266 +438,166 @@ module DICOM
607
438
  end
608
439
  end
609
440
  end
610
- end
441
+ end # of method check_level_end
611
442
 
612
443
 
613
- # Returns the (processed) value of a DICOM tag based on an input tag label, category name or array index.
614
- def get_value(id)
615
- # Assume we have been fed a tag label:
616
- pos=@labels.index(id)
617
- # If this does not give a hit, assume we have been fed a tag name:
618
- if pos==nil
619
- pos=@names.index(id)
620
- end
621
- # If we still dont have a hit, check if it is a valid number within the array range:
622
- if pos == nil
623
- if (id.is_a? Integer)
624
- if id >= 0 and id <= @lastIndex
625
- # The id supplied is a valid position, return its corresponding value:
626
- return @values[id]
444
+ # Tests if the file is readable and opens it.
445
+ def open_file(file)
446
+ if File.exist?(file)
447
+ if File.readable?(file)
448
+ if not File.directory?(file)
449
+ if File.size(file) > 8
450
+ @file = File.new(file, "rb")
451
+ else
452
+ @msg += ["Error! File is too small to contain DICOM information. Returning. (#{file})"]
453
+ end
627
454
  else
628
- return false
455
+ @msg += ["Error! File is a directory. Returning. (#{file})"]
629
456
  end
630
457
  else
631
- return false
458
+ @msg += ["Error! File exists but I don't have permission to read it. Returning. (#{file})"]
632
459
  end
633
460
  else
634
- # We have a valid position, return the value:
635
- return @values[pos]
461
+ @msg += ["Error! The file you have supplied does not exist. Returning. (#{file})"]
636
462
  end
637
- end
463
+ end # of method open_file
638
464
 
639
465
 
640
- # Process a series of numbers to return a string containing all the numbers separated with the separator "/" between the numbers.
641
- def process_numbers(length, type, bin)
642
- size = bin.length
643
- data=""
644
- case type
645
- when "UL"
646
- temp1 = get_UL(bin)
647
- when "SL"
648
- temp1 = get_SL(bin)
649
- when "US"
650
- temp1 = get_US(bin)
651
- when "SS"
652
- temp1 = get_SS(bin)
653
- when "FD"
654
- temp1 = get_FD(bin)
655
- else
656
- @msg+=["Warning: Type "+type+"not supported in method process_numbers()."]
657
- end
658
- remain = (length-size)/size
659
- remain.times do
660
- bin = @file.read(size)
661
- case type
662
- when "UL"
663
- temp2 = get_UL(bin)
664
- when "SL"
665
- temp2 = get_SL(bin)
666
- when "US"
667
- temp2 = get_US(bin)
668
- when "SS"
669
- temp2 = get_SS(bin)
670
- when "FD"
671
- temp2 = get_FD(bin)
672
- else
673
- @msg+=["Warning: Type "+type+"not supported in method process_numbers()."]
674
- end
675
- data = temp1.to_s+"/"+temp2.to_s
676
- temp1 = data
677
- end
678
- return data
679
- end
680
-
681
- # Returns a byte integer (1 byte), from the supplied variable.
682
- def get_BYTE(bin)
683
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
684
- elements = bin.size
685
- if @endian
686
- # Native byte order:
687
- if elements > 1
688
- num=bin.unpack('C*')
689
- else
690
- num=bin.unpack('C*')[0]
691
- end
692
- else
693
- # Network byte order: (Unknown what to use here)
694
- @msg+=["Warning: Method get_BYTE not tested with this endian yet!"]
695
- if elements > 1
696
- num=bin.unpack('C*')
697
- else
698
- num=bin.unpack('C*')[0]
699
- end
700
- end
701
- return num
702
- end
703
-
704
-
705
- # Returns a unsigned short (2 bytes), from the supplied variable.
706
- def get_US(bin)
707
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
708
- elements = bin.size/2
709
- if @endian
710
- # Native byte order:
711
- if elements > 1
712
- num=bin.unpack('S*') # or v (little endian (?))
713
- else
714
- num=bin.unpack('S*')[0]
715
- end
716
- else
717
- # Network byte order:
718
- if elements > 1
719
- num=bin.unpack('n*')
720
- else
721
- num=bin.unpack('n*')[0]
722
- end
723
- end
724
- return num
725
- end
726
-
727
-
728
- # Returns a signed short (2 bytes), from the supplied variable.
729
- def get_SS(bin)
730
- elements = bin.size/2
731
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
732
- if @endian
733
- # Native byte order
734
- if elements > 1
735
- num=bin.unpack('s*')
736
- else
737
- num=bin.unpack('s*')[0]
738
- end
739
- else
740
- # Unknown what unpack code to use here:
741
- if elements > 1
742
- num=bin.unpack('s*')
743
- else
744
- num=bin.unpack('s*')[0]
745
- end
746
- @msg+=["Warning: Oppositve endian for signed short is not working yet!"]
747
- end
748
- return num
749
- end
750
-
751
-
752
- # Returns an unsigned long (4 bytes), from the supplied variable.
753
- def get_UL(bin)
754
- elements = bin.size/4
755
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
756
- if @endian
757
- # Unsigned native integer:
758
- if elements > 1
759
- num=bin.unpack('I*')
760
- else
761
- num=bin.unpack('I*')[0]
762
- end
466
+ # Changes encoding variables as the file reading proceeds past the initial 0002 group of the DICOM file.
467
+ def switch_syntax()
468
+ # The information read from the Transfer syntax tag (if present), needs to be processed:
469
+ process_transfer_syntax()
470
+ # We only plan to run this method once:
471
+ @switched = true
472
+ # Update endian, explicitness and unpack variables:
473
+ @file_endian = @rest_endian
474
+ @explicit = @rest_explicit
475
+ if @sys_endian == @file_endian
476
+ @endian = true
763
477
  else
764
- # Unsigned long in network byte order:
765
- if elements > 1
766
- num=bin.unpack('N*')
767
- else
768
- num=bin.unpack('N*')[0]
769
- end
478
+ @endian = false
770
479
  end
771
- return num
480
+ set_unpack_strings()
772
481
  end
773
482
 
774
483
 
775
- # Returns a signed long (4 bytes), from the supplied variable.
776
- def get_SL(bin)
777
- elements = bin.size/4
778
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
779
- if @endian
780
- # Signed native long integer:
781
- if elements > 1
782
- num=bin.unpack('l*')
783
- else
784
- num=bin.unpack('l*')[0]
785
- end
786
- else
787
- @msg+=["Warning: Oppositve endian for signed long is not working yet!"]
788
- if elements > 1
789
- num=bin.unpack('l*')
790
- else
791
- num=bin.unpack('l*')[0]
792
- end
793
- end
794
- return num
795
- end
484
+ # Checks the Transfer Syntax UID tag and updates class variables to prepare for correct reading of DICOM file.
485
+ # A lot of code here is duplicated in DWrite class. Should move as much of this code as possible to DLibrary I think.
486
+ def process_transfer_syntax()
487
+ ts_pos = @labels.index("0002,0010")
488
+ if ts_pos != nil
489
+ ts_value = @raw[ts_pos].unpack('a*').to_s.rstrip
490
+ valid = @lib.check_ts_validity(ts_value)
491
+ if not valid
492
+ @msg+=["Warning: Invalid/unknown transfer syntax! Will try reading the file, but errors may occur."]
493
+ end
494
+ case ts_value
495
+ # Some variations with uncompressed pixel data:
496
+ when "1.2.840.10008.1.2"
497
+ # Implicit VR, Little Endian
498
+ @rest_explicit = false
499
+ @rest_endian = false
500
+ when "1.2.840.10008.1.2.1"
501
+ # Explicit VR, Little Endian
502
+ @rest_explicit = true
503
+ @rest_endian = false
504
+ when "1.2.840.10008.1.2.1.99"
505
+ # Deflated Explicit VR, Little Endian
506
+ @msg += ["Warning: Transfer syntax 'Deflated Explicit VR, Little Endian' is untested. Unknown if this is handled correctly!"]
507
+ @rest_explicit = true
508
+ @rest_endian = false
509
+ when "1.2.840.10008.1.2.2"
510
+ # Explicit VR, Big Endian
511
+ @rest_explicit = true
512
+ @rest_endian = true
513
+ else
514
+ # For everything else, assume compressed pixel data, with Explicit VR, Little Endian:
515
+ @rest_explicit = true
516
+ @rest_endian = false
517
+ end # of case ts_value
518
+ end # of if ts_pos != nil
519
+ end # of method process_syntax
796
520
 
797
521
 
798
- # Returns a floating point double (8 bytes), from the supplied variable.
799
- def get_FD(bin)
800
- elements = bin.size/8
801
- # If bin contains several numbers, unpack and return in an array. If just one number, return the number:
522
+ # Sets the unpack format strings that will be used for numbers depending on endianness of file/system.
523
+ def set_unpack_strings
802
524
  if @endian
803
- # Double in little-endian byte order:
804
- if elements > 1
805
- num=bin.unpack('E*')
806
- else
807
- num=bin.unpack('E*')[0]
808
- end
525
+ # System endian equals file endian:
526
+ # Native byte order.
527
+ @by = "C*" # Byte (1 byte)
528
+ @us = "S*" # Unsigned short (2 bytes)
529
+ @ss = "s*" # Signed short (2 bytes)
530
+ @ul = "I*" # Unsigned long (4 bytes)
531
+ @sl = "l*" # Signed long (4 bytes)
532
+ @fs = "e*" # Floating point single (4 bytes)
533
+ @fd = "E*" # Floating point double ( 8 bytes)
809
534
  else
810
- # Double in network byte order:
811
- if elements > 1
812
- num=bin.unpack('G*')
813
- else
814
- num=bin.unpack('G*')[0]
815
- end
535
+ # System endian not equal to file endian:
536
+ # Network byte order.
537
+ @by = "C*"
538
+ @us = "n*"
539
+ @ss = "n*" # Not correct (gives US)
540
+ @ul = "N*"
541
+ @sl = "N*" # Not correct (gives UL)
542
+ @fs = "g*"
543
+ @fd = "G*"
816
544
  end
817
- return num
818
545
  end
819
546
 
820
547
 
821
- # Checks the Transfer Syntax UID tag and updates class variables to prepare for correct reading of DICOM file.
822
- def process_syntax(value)
823
- ts = value.rstrip
824
- valid = @lib.check_transfer_syntax(ts)
825
- if not valid
826
- @msg+=["Warning: Invalid/unknown transfer syntax! Will try reading the file, but errors may occur."]
827
- end
828
- case ts
829
- # Some variations with uncompressed pixel data:
830
- when "1.2.840.10008.1.2"
831
- # Implicit VR, Little Endian
832
- @rest_explicit = false
833
- @rest_endian = false
834
- @compression = false
835
- when "1.2.840.10008.1.2.1"
836
- # Explicit VR, Little Endian
837
- @rest_explicit = true
838
- @rest_endian = false
839
- @compression = false
840
- when "1.2.840.10008.1.2.1.99"
841
- # Deflated Explicit VR, Little Endian
842
- @msg+=["Warning: Transfer syntax 'Deflated Explicit VR, Little Endian' is untested. Unknown if this is handled correctly!"]
843
- @rest_explicit = true
844
- @rest_endian = false
845
- @compression = false
846
- when "1.2.840.10008.1.2.2"
847
- # Explicit VR, Big Endian
848
- @rest_explicit = true
849
- @rest_endian = true
850
- @compression = false
548
+ # Initiates the variables that are used during file reading.
549
+ def init_variables()
550
+ # Variables that hold data that will be available to the DObject class.
551
+ # Arrays that will hold information from the tags of the DICOM file:
552
+ @names = Array.new()
553
+ @labels = Array.new()
554
+ @types = Array.new()
555
+ @lengths = Array.new()
556
+ @values = Array.new()
557
+ @raw = Array.new()
558
+ @levels = Array.new()
559
+ # Array that will holde any messages generated while reading the DICOM file:
560
+ @msg = Array.new()
561
+ # Variables that contain properties of the DICOM file:
562
+ # Variable to keep track of whether the image pixel data in this file are compressed or not, and if it exists at all:
563
+ # Default explicitness of start of DICOM file::
564
+ @explicit = true
565
+ # Default endianness of start of DICOM files is little endian:
566
+ @file_endian = false
567
+ # Variable used to tell whether file was read succesfully or not:
568
+ @success = false
569
+
570
+ # Variables used internally when reading through the DICOM file:
571
+ # Array for keeping track of how many bytes have been read from the file up to and including each tag:
572
+ # (This is necessary for tracking the hiearchy in some DICOM files)
573
+ @integrated_lengths = Array.new()
574
+ @header_length = 0
575
+ # Array to keep track of the hierarchy of tags (this will be used to determine when a sequence or item is finished):
576
+ @hierarchy = Array.new()
577
+ @hierarchy_error = false
578
+ # Explicitness of the remaining groups after the initial 0002 group:
579
+ @rest_explicit = false
580
+ # Endianness of the remaining groups after the first group:
581
+ @rest_endian = false
582
+ # When the file switch from group 0002 to a later group we will update encoding values, and this switch will keep track of that:
583
+ @switched = false
584
+ # Use a "relationship endian" variable to guide reading of file:
585
+ if @sys_endian == @file_endian
586
+ @endian = true
851
587
  else
852
- # For everything else, assume compressed pixel data, with Explicit VR, Little Endian:
853
- @rest_explicit = true
854
- @rest_endian = false
855
- @compression = true
588
+ @endian = false
856
589
  end
590
+ # Set which format strings to use when unpacking numbers:
591
+ set_unpack_strings
592
+ # A length variable will be used at the end to check whether the last tag was read correctly, or whether the file endend unexpectedly:
593
+ @data_length = 0
594
+ # Keeping track of the tag level while reading through the file:
595
+ @current_level = 0
596
+ # This variable's string will be inserted as the length of items/sq that dont have a specified length:
597
+ @undef = "UNDEFINED"
598
+ # Items contained under the pixel data tag may contain data directly, so we need a variable to keep track of this:
599
+ @enc_image = false
857
600
  end
858
601
 
859
-
860
- # Checks the endianness of the system. Returns false if little endian, true if big endian.
861
- def check_sys_endian()
862
- x = 0xdeadbeef
863
- endian_type = {
864
- Array(x).pack("V*") => false, #:little
865
- Array(x).pack("N*") => true #:big
866
- }
867
- return endian_type[Array(x).pack("L*")]
868
- end
869
-
870
-
871
- end # End of class.
872
- end # End of module.
602
+ end # End of class
603
+ end # End of module