dicom 0.7 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
data/lib/dicom/DRead.rb DELETED
@@ -1,532 +0,0 @@
1
- # Copyright 2008-2010 Christoffer Lervag
2
-
3
- # Some notes about this DICOM file reading class:
4
- # In addition to reading files that are compliant to DICOM 3 Part 10, the philosophy of this library
5
- # is to have maximum compatibility, and as such it will read most 'DICOM' files that deviate from the standard.
6
- # While reading files, this class will also analyse the hierarchy of elements for those DICOM files that
7
- # feature sequences and items, enabling the user to take advantage of this information for advanced
8
- # querying of the DICOM object afterwards.
9
-
10
- module DICOM
11
- # Class for reading the data from a DICOM file:
12
- class DRead
13
-
14
- attr_reader :success, :names, :tags, :vr, :lengths, :values, :bin, :levels, :explicit, :file_endian, :msg
15
-
16
- # Initialize the DRead instance.
17
- def initialize(string=nil, options={})
18
- # Process option values, setting defaults for the ones that are not specified:
19
- @sys_endian = options[:sys_endian] || false
20
- @bin_string = options[:bin]
21
- @transfer_syntax = options[:syntax]
22
- # Initiate the variables that are used during file reading:
23
- init_variables
24
-
25
- # Are we going to read from a file, or read from a binary string:
26
- if @bin_string
27
- # Read from the provided binary string:
28
- @str = string
29
- else
30
- # Read from file:
31
- open_file(string)
32
- # Read the initial header of the file:
33
- if @file == nil
34
- # File is not readable, so we return:
35
- return
36
- else
37
- # Extract the content of the file to a binary string:
38
- @str = @file.read
39
- @file.close
40
- end
41
- end
42
- # Create a Stream instance to handle the decoding of content from this binary string:
43
- @stream = Stream.new(@str, @file_endian, @explicit)
44
- # Do not check for header information when supplied a (network) binary string:
45
- unless @bin_string
46
- # Read and verify the DICOM header:
47
- header = check_header
48
- # If the file didnt have the expected header, we will attempt to read
49
- # data elements from the very start file:
50
- if header == false
51
- @stream.skip(-132)
52
- elsif header == nil
53
- # Not a valid DICOM file, return:
54
- return
55
- end
56
- end
57
-
58
- # Run a loop to read the data elements:
59
- # (Data element information is stored in arrays by the method process_data_element)
60
- data_element = true
61
- while data_element != false do
62
- data_element = process_data_element
63
- end
64
-
65
- # Post processing:
66
- # Assume file has been read successfully:
67
- @success = true
68
- # Check if the last element was read out correctly (that the length of its data (@bin.last.length)
69
- # corresponds to that expected by the length specified in the DICOM file (@lengths.last)).
70
- # We only run this test if the last element has a positive expectation value, obviously.
71
- if @lengths.last.to_i > 0
72
- if @bin.last.length != @lengths.last
73
- @msg << "Error! The data content read from file does not match the length specified for the tag #{@tags.last}. It seems this is either an invalid or corrupt DICOM file. Returning."
74
- @success = false
75
- return
76
- end
77
- end
78
- end # of initialize
79
-
80
-
81
- # Extract an array of binary strings
82
- # (this is typically used if one intends to transmit the DICOM file through a network connection)
83
- def extract_segments(size)
84
- # For this purpose we are not interested to include header or meta information.
85
- # We must therefore find the position of the first tag which is not a meta information tag.
86
- pos = first_non_meta
87
- # Start position:
88
- if pos == 0
89
- start = 0
90
- else
91
- # First byte after the integrated length of the previous tag is our start:
92
- start = @integrated_lengths[pos-1]
93
- end
94
- # Iterate through the tags and monitor the integrated_lengths values to determine
95
- # when we need to start a new segment.
96
- segments = Array.new
97
- last_pos = pos
98
- @tags.each_index do |i|
99
- # Have we passed the size limit?
100
- if (@integrated_lengths[i] - start) > size
101
- # We either need to stop the current segment at the previous tag, or if
102
- # this is a long tag (typically image data), we need to split its data
103
- # and put it in several segments.
104
- if (@integrated_lengths[i] - @integrated_lengths[i-1]) > size
105
- # This element's value needs to be split up into several segments.
106
- # How many segments are needed to fit this element?
107
- number = ((@integrated_lengths[i] - start).to_f / size.to_f).ceil
108
- number.times do
109
- # Extract data and add to segments:
110
- last_pos = (start+size-1)
111
- segments << @stream.string[start..last_pos]
112
- # Update start position for next segment:
113
- start = last_pos + 1
114
- end
115
- else
116
- # End the current segment at the last data element, then start the new segment with this element.
117
- last_pos = @integrated_lengths[i-1]
118
- segments << @stream.string[start..last_pos]
119
- # Update start position for next segment:
120
- start = last_pos + 1
121
- end
122
- end
123
- end
124
- # After running the above iteration, it is possible that we have some data elements remaining
125
- # at the end of the file who's length are beneath the size limit, and thus has not been put into a segment.
126
- if (last_pos + 1) < @stream.string.length
127
- # Add the remaining data elements to a segment:
128
- segments << @stream.string[start..@stream.string.length]
129
- end
130
- return segments
131
- end
132
-
133
-
134
- # Following methods are private:
135
- private
136
-
137
-
138
- # Checks the initial header of the DICOM file.
139
- def check_header
140
- # According to the official DICOM standard, a DICOM file shall contain 128
141
- # consequtive (zero) bytes followed by 4 bytes that spell the string 'DICM'.
142
- # Apparently, some providers seems to skip this in their DICOM files.
143
- # Check that the file is long enough to contain a valid header:
144
- if @str.length < 132
145
- # This does not seem to be a valid DICOM file and so we return.
146
- return nil
147
- else
148
- @stream.skip(128)
149
- # Next 4 bytes should spell "DICM":
150
- identifier = @stream.decode(4, "STR")
151
- @header_length += 132
152
- if identifier != "DICM" then
153
- # Header is not valid (we will still try to read it is a DICOM file though):
154
- @msg << "Warning: The specified file does not contain the official DICOM header. Will try to read the file anyway, as some sources are known to skip this header."
155
- # As the file is not conforming to the DICOM standard, it is possible that it does not contain a
156
- # transfer syntax element, and as such, we attempt to choose the most probable encoding values here:
157
- @explicit = false
158
- @stream.explicit = false
159
- return false
160
- else
161
- # Header is valid:
162
- return true
163
- end
164
- end
165
- end
166
-
167
-
168
- # Governs the process of reading data elements from the DICOM file.
169
- def process_data_element
170
- #STEP 1: ------------------------------------------------------
171
- # Attempt to read data element tag, but abort if we have reached end of file:
172
- tag = read_tag
173
- if tag == false
174
- # End of file, no more elements.
175
- return false
176
- end
177
- # STEP 2: ------------------------------------------------------
178
- # Access library to retrieve the data element name and VR from the tag we just read:
179
- lib_data = LIBRARY.get_name_vr(tag)
180
- name = lib_data[0]
181
- vr = lib_data[1]
182
- # (Note: VR will be overwritten if the DICOM file contains VR)
183
-
184
- # STEP 3: ----------------------------------------------------
185
- # Read VR (if it exists) and the length value:
186
- tag_info = read_vr_length(vr,tag)
187
- vr = tag_info[0]
188
- level_vr = vr
189
- length = tag_info[1]
190
-
191
- # STEP 4: ----------------------------------------
192
- # Reading value of data element.
193
- # Special handling needed for items in encapsulated image data:
194
- if @enc_image and tag == "FFFE,E000"
195
- # The first item appearing after the image element is a 'normal' item, the rest hold image data.
196
- # Note that the first item will contain data if there are multiple images, and so must be read.
197
- vr = "OW" # how about alternatives like OB?
198
- # Modify name of item if this is an item that holds pixel data:
199
- if @tags.last != "7FE0,0010"
200
- name = "Pixel Data Item"
201
- end
202
- end
203
- # Read the value of the element (if it contains data, and it is not a sequence or ordinary item):
204
- if length.to_i > 0 and vr != "SQ" and vr != "()"
205
- # Read the element's value (data):
206
- data = read_value(vr,length)
207
- value = data[0]
208
- bin = data[1]
209
- else
210
- # Data element has no value (data).
211
- # Special case: Check if pixel data element is sequenced:
212
- if tag == "7FE0,0010"
213
- # Change name and vr of pixel data element if it does not contain data itself:
214
- name = "Encapsulated Pixel Data"
215
- level_vr = "SQ"
216
- @enc_image = true
217
- end
218
- end # of if length.to_i > 0
219
- # Set the hiearchy level of this data element:
220
- set_level(level_vr, length, tag, name)
221
- # Transfer the gathered data to arrays and return true:
222
- @names << name
223
- @tags << tag
224
- @vr << vr
225
- @lengths << length
226
- @values << value
227
- @bin << bin
228
- return true
229
- end # of process_data_element
230
-
231
-
232
- # Reads and returns the data element's TAG (4 first bytes of element).
233
- def read_tag
234
- tag = @stream.decode_tag
235
- # Do not proceed if we have reached end of file (tag is nil):
236
- return false unless tag
237
- # Tag was valid, so we add the length of the data element tag.
238
- # If this was the first element read from file, we need to add the header length too:
239
- if @integrated_lengths.length == 0
240
- # Increase the array with the length of the header + the 4 bytes:
241
- @integrated_lengths << (@header_length + 4)
242
- else
243
- # For the remaining elements, increase the array with the integrated length of the previous elements + the 4 bytes:
244
- @integrated_lengths << (@integrated_lengths[@integrated_lengths.length-1] + 4)
245
- end
246
- # When we shift from group 0002 to another group we need to update our endian/explicitness variables:
247
- if tag[0..3] != "0002" and @switched == false
248
- switch_syntax
249
- end
250
- return tag
251
- end
252
-
253
-
254
- # Reads and returns data element VR (2 bytes) and data element LENGTH (Varying length; 2-6 bytes).
255
- def read_vr_length(vr,tag)
256
- # Structure will differ, dependent on whether we have explicit or implicit encoding:
257
- pre_skip = 0
258
- bytes = 0
259
- # *****EXPLICIT*****:
260
- if @explicit == true
261
- # Step 1: Read VR (if it exists)
262
- unless tag == "FFFE,E000" or tag == "FFFE,E00D" or tag == "FFFE,E0DD"
263
- # Read the element's vr (2 bytes - since we are not dealing with an item related element):
264
- vr = @stream.decode(2, "STR")
265
- @integrated_lengths[@integrated_lengths.length-1] += 2
266
- end
267
- # Step 2: Read length
268
- # Three possible structures for value length here, dependent on element vr:
269
- case vr
270
- when "OB","OW","SQ","UN","UT"
271
- # 6 bytes total:
272
- # Two empty bytes first:
273
- pre_skip = 2
274
- # Value length (4 bytes):
275
- bytes = 4
276
- when "()"
277
- # 4 bytes:
278
- # For elements "FFFE,E000", "FFFE,E00D" and "FFFE,E0DD":
279
- bytes = 4
280
- else
281
- # 2 bytes:
282
- # For all the other element vr, value length is 2 bytes:
283
- bytes = 2
284
- end
285
- else
286
- # *****IMPLICIT*****:
287
- # Value length (4 bytes):
288
- bytes = 4
289
- end
290
- # Handle skips and read out length value:
291
- @stream.skip(pre_skip)
292
- if bytes == 2
293
- length = @stream.decode(bytes, "US") # (2)
294
- else
295
- length = @stream.decode(bytes, "UL") # (4)
296
- end
297
- # Update integrated lengths array:
298
- @integrated_lengths[@integrated_lengths.length-1] += (pre_skip + bytes)
299
- # For encapsulated data, the element length will not be defined. To convey this,
300
- # the hex sequence 'ff ff ff ff' is used (-1 converted to signed long, 4294967295 converted to unsigned long).
301
- if length == 4294967295
302
- length = @undef
303
- elsif length%2 >0
304
- # According to the DICOM standard, all data element lengths should be an even number.
305
- # If it is not, it may indicate a file that is not standards compliant or it might even not be a DICOM file.
306
- @msg += ["Warning: Odd number of bytes in data element's length occured. This is a violation of the DICOM standard, but program will attempt to read the rest of the file anyway."]
307
- end
308
- return [vr, length]
309
- end # of read_vr_length
310
-
311
-
312
- # Reads and returns data element VALUE (Of varying length - which is determined at an earlier stage).
313
- def read_value(vr, length)
314
- # Extract the binary data:
315
- bin = @stream.extract(length)
316
- @integrated_lengths[@integrated_lengths.size-1] += length
317
- # Decode data?
318
- # Some data elements (like those containing image data, compressed data or unknown data),
319
- # will not be decoded here.
320
- unless vr == "OW" or vr == "OB" or vr == "OF" or vr == "UN"
321
- # "Rewind" and extract the value from this binary data:
322
- @stream.skip(-length)
323
- # Decode data:
324
- value = @stream.decode(length, vr)
325
- if not value.is_a?(Array)
326
- data = value
327
- else
328
- # If the returned value is not a string, it is an array of multiple elements,
329
- # which need to be joined to a string with the separator "\":
330
- data = value.join("\\")
331
- end
332
- else
333
- # No decoded data:
334
- data = nil
335
- end
336
- # Return the data:
337
- return [data, bin]
338
- end # of read_value
339
-
340
-
341
- # Sets the level of the current element in the hiearchy.
342
- # The default (top) level is zero.
343
- def set_level(vr, length, tag, name)
344
- # Set the level of this element:
345
- @levels += [@current_level]
346
- # Determine if there is a level change for the following element:
347
- # If element is a sequence, the level of the following elements will be increased by one.
348
- # If element is an item, the level of the following elements will likewise be increased by one.
349
- # Note the following exception:
350
- # If data element is an "Item", and it contains data (image fragment) directly, which is to say,
351
- # not in its sub-elements, we should not increase the level. (This is fixed in the process_data_element method.)
352
- if vr == "SQ"
353
- increase = true
354
- elsif name == "Item"
355
- increase = true
356
- else
357
- increase = false
358
- end
359
- if increase == true
360
- @current_level = @current_level + 1
361
- # If length of sequence/item is specified, we must note this length + the current element position in the arrays:
362
- if length.to_i != 0
363
- @hierarchy << [length, @integrated_lengths.last]
364
- else
365
- @hierarchy << vr
366
- end
367
- end
368
- # Need to check whether a previous sequence or item has ended, if so the level must be decreased by one:
369
- # In the case of tag specification:
370
- if (tag == "FFFE,E00D") or (tag == "FFFE,E0DD")
371
- @current_level = @current_level - 1
372
- end
373
- # In the case of sequence and item length specification:
374
- # Check the last position in the hieararchy array.
375
- # If it is an array (of length and position), then we need to check the integrated_lengths array
376
- # to see if the current sub-level has expired.
377
- if @hierarchy.size > 0
378
- # Do not perform this check for Pixel Data Items or Sequence Delimitation Items:
379
- # (If performed, it will give false errors for the case when we have Encapsulated Pixel Data)
380
- check_level_end unless name == "Pixel Data Item" or tag == "FFFE,E0DD"
381
- end
382
- end # of set_level
383
-
384
-
385
- # Checks how far we've read in the DICOM file to determine if we have reached a point
386
- # where sub-levels are ending. This method is recursive, as multiple sequences/items might end at the same point.
387
- def check_level_end
388
- # The test is only meaningful to perform if we are not expecting an 'end of sequence/item' element to signal the level-change.
389
- if (@hierarchy.last).is_a?(Array)
390
- described_length = (@hierarchy.last)[0]
391
- previous_length = (@hierarchy.last)[1]
392
- current_length = @integrated_lengths.last
393
- current_diff = current_length - previous_length
394
- if current_diff == described_length
395
- # Decrease level by one:
396
- @current_level = @current_level - 1
397
- # Also we need to delete the last entry of the @hierarchy array:
398
- if (@hierarchy.size > 1)
399
- @hierarchy = @hierarchy[0..(@hierarchy.size-2)]
400
- # There might be numerous levels that ends at this particular point, so we need to do a recursive repeat to check.
401
- check_level_end
402
- else
403
- @hierarchy = Array.new()
404
- end
405
- elsif current_diff > described_length
406
- # Only register this type of error one time per file to avoid a spamming effect:
407
- if not @hierarchy_error
408
- @msg += ["Unexpected hierarchy incident: Current length difference is greater than the expected value, which should not occur. This will not pose any problems unless you intend to query the object for elements based on hierarchy."]
409
- @hierarchy_error = true
410
- end
411
- end
412
- end
413
- end
414
-
415
-
416
- # Tests if the file is readable and opens it.
417
- def open_file(file)
418
- if File.exist?(file)
419
- if File.readable?(file)
420
- if not File.directory?(file)
421
- if File.size(file) > 8
422
- @file = File.new(file, "rb")
423
- else
424
- @msg << "Error! File is too small to contain DICOM information. Returning. (#{file})"
425
- end
426
- else
427
- @msg << "Error! File is a directory. Returning. (#{file})"
428
- end
429
- else
430
- @msg << "Error! File exists but I don't have permission to read it. Returning. (#{file})"
431
- end
432
- else
433
- @msg << "Error! The file you have supplied does not exist. Returning. (#{file})"
434
- end
435
- end
436
-
437
-
438
- # Changes encoding variables as the file reading proceeds past the initial 0002 group of the DICOM file.
439
- def switch_syntax
440
- # Get the transfer syntax string, unless it has already been provided by keyword:
441
- unless @transfer_syntax
442
- ts_pos = @tags.index("0002,0010")
443
- if ts_pos
444
- @transfer_syntax = @values[ts_pos].rstrip
445
- else
446
- @transfer_syntax = "1.2.840.10008.1.2" # Default is implicit, little endian
447
- end
448
- end
449
- # Query the library with our particular transfer syntax string:
450
- result = LIBRARY.process_transfer_syntax(@transfer_syntax)
451
- # Result is a 3-element array: [Validity of ts, explicitness, endianness]
452
- unless result[0]
453
- @msg+=["Warning: Invalid/unknown transfer syntax! Will try reading the file, but errors may occur."]
454
- end
455
- @rest_explicit = result[1]
456
- @rest_endian = result[2]
457
- # We only plan to run this method once:
458
- @switched = true
459
- # Update endian, explicitness and unpack variables:
460
- @file_endian = @rest_endian
461
- @stream.set_endian(@rest_endian)
462
- @explicit = @rest_explicit
463
- @stream.explicit = @rest_explicit
464
- end
465
-
466
-
467
- # Find the position of the first tag which is not a group "0002" tag:
468
- def first_non_meta
469
- i = 0
470
- go = true
471
- while go == true and i < @tags.length do
472
- tag = @tags[i]
473
- if tag[0..3] == "0002"
474
- i += 1
475
- else
476
- go = false
477
- end
478
- end
479
- return i
480
- end
481
-
482
-
483
- # Initiates the variables that are used during file reading.
484
- def init_variables
485
- # Variables that hold data that will be available to the DObject class.
486
- # Arrays that will hold information from the elements of the DICOM file:
487
- @names = Array.new
488
- @tags = Array.new
489
- @vr = Array.new
490
- @lengths = Array.new
491
- @values = Array.new
492
- @bin = Array.new
493
- @levels = Array.new
494
- # Array that will holde any messages generated while reading the DICOM file:
495
- @msg = Array.new
496
- # Variables that contain properties of the DICOM file:
497
- # Variable to keep track of whether the image pixel data in this file are compressed or not, and if it exists at all:
498
- # Default explicitness of start of DICOM file::
499
- @explicit = true
500
- # Default endianness of start of DICOM files is little endian:
501
- @file_endian = false
502
- # Variable used to tell whether file was read succesfully or not:
503
- @success = false
504
-
505
- # Variables used internally when reading through the DICOM file:
506
- # Array for keeping track of how many bytes have been read from the file up to and including each data element:
507
- # (This is necessary for tracking the hiearchy in some DICOM files)
508
- @integrated_lengths = Array.new
509
- @header_length = 0
510
- # Array to keep track of the hierarchy of elements (this will be used to determine when a sequence or item is finished):
511
- @hierarchy = Array.new
512
- @hierarchy_error = false
513
- # Explicitness of the remaining groups after the initial 0002 group:
514
- @rest_explicit = false
515
- # Endianness of the remaining groups after the first group:
516
- @rest_endian = false
517
- # When the file switch from group 0002 to a later group we will update encoding values, and this switch will keep track of that:
518
- @switched = false
519
- # A length variable will be used at the end to check whether the last element was read correctly, or whether the file endend unexpectedly:
520
- @data_length = 0
521
- # Keeping track of the data element's level while reading through the file:
522
- @current_level = 0
523
- # This variable's string will be inserted as the length of items/sq that dont have a specified length:
524
- @undef = "UNDEFINED"
525
- # Items contained under the pixel data element may contain data directly, so we need a variable to keep track of this:
526
- @enc_image = false
527
- # Assume header size is zero bytes until otherwise is determined:
528
- @header_length = 0
529
- end
530
-
531
- end # of class
532
- end # of module