subconv 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,129 @@
1
+ require 'subconv/caption'
2
+
3
+ module Subconv
4
+ # Apply post-processing to captions
5
+ # Supported operations:
6
+ # - remove color nodes
7
+ # - remove flash nodes
8
+ # - convert XY viewport relative positions to simple top or bottom positions
9
+ # - merge multiple captions on screen at the same time into one caption
10
+ class CaptionFilter
11
+ def initialize(options)
12
+ @options = options
13
+ @options[:filter_node_types] ||= []
14
+ @options[:filter_node_types].push ColorNode if @options[:remove_color]
15
+ @options[:filter_node_types].push FlashNode if @options[:remove_flash]
16
+ end
17
+
18
+ def process!(captions)
19
+ last_timespan = nil
20
+ open_captions = {}
21
+ last_top_position = nil
22
+
23
+ captions.map! do |caption|
24
+ is_same_timespan = last_timespan == caption.timespan
25
+
26
+ unless is_same_timespan
27
+ # Time changed -> do not compare with previous top position
28
+ last_top_position = nil
29
+ open_captions = {}
30
+ end
31
+
32
+ if @options[:xy_position_to_top_or_bottom]
33
+ caption.position = if caption.position.y < 0.5
34
+ last_top_position = caption.position.y
35
+ :top
36
+ elsif !last_top_position.nil? && (caption.position.y - last_top_position) < 0.08
37
+ # Do not move lines to the bottom when they are continuing directly
38
+ # from a caption displayed at the top to avoid continuous captions
39
+ # being thorn in half
40
+ last_top_position = caption.position.y
41
+ :top
42
+ else
43
+ :bottom
44
+ end
45
+
46
+ # x position is removed altogether and the caption is now center-aligned
47
+ caption.align = :middle
48
+ end
49
+
50
+ # Captions are expected to be ordered by increasing timespan and y position (this is
51
+ # guaranteed for the SCC reader)
52
+ if @options[:merge_by_position] && is_same_timespan && open_captions.key?(caption.position)
53
+ target_caption = open_captions[caption.position]
54
+ target_caption.content.children.push TextNode.new("\n")
55
+ target_caption.content.children.concat caption.content.children
56
+ # Remove this caption since it has been merged
57
+ next
58
+ end
59
+
60
+ last_timespan = caption.timespan
61
+
62
+ open_captions[caption.position] = caption
63
+ caption
64
+ end
65
+ # Remove nils resulting from removed captions
66
+ captions.compact!
67
+ # Do per-caption processing after merging etc.
68
+ captions.each do |caption|
69
+ process_caption!(caption)
70
+ end
71
+ end
72
+
73
+ def process_caption!(caption)
74
+ filter_nodes! caption.content
75
+ merge_text_nodes! caption.content
76
+ end
77
+
78
+ private
79
+
80
+ # Merge continuous text nodes
81
+ # Example: [TextNode("a"), TextNode("b")] -> [TextNode("ab")]
82
+ def merge_text_nodes!(node)
83
+ return node unless node.is_a? ContainerNode
84
+
85
+ current_text_node = nil
86
+ node.children.map! do |child|
87
+ if child.is_a? TextNode
88
+ if current_text_node.nil?
89
+ current_text_node = child
90
+ else
91
+ # Add text to previous node
92
+ current_text_node.text << child.text
93
+ # Remove this node
94
+ next
95
+ end
96
+ else
97
+ current_text_node = nil
98
+ merge_text_nodes! child
99
+ end
100
+ child
101
+ end
102
+
103
+ # Remove nils from removed text nodes
104
+ node.children.compact!
105
+ end
106
+
107
+ # Remove specified nodes in an array, i.e. replace them with their children
108
+ # Only container nodes can be filtered
109
+ def filter_nodes!(node)
110
+ return node unless node.is_a? ContainerNode
111
+
112
+ node.children.map! do |child|
113
+ # Filter recursively
114
+ filter_nodes! child
115
+ if @options[:filter_node_types].include?(child.class)
116
+ # Replace child with contents
117
+ child.children
118
+ else
119
+ child
120
+ end
121
+ end
122
+ # Flatten away arrays that might have been introduced
123
+ # by removing nodes
124
+ node.children.flatten!
125
+
126
+ node
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,470 @@
1
+ # frozen_string_literal: true
2
+ require 'subconv/utility'
3
+
4
+ require 'solid_struct'
5
+ require 'timecode'
6
+
7
+ module Subconv
8
+ module Scc
9
+ FILE_MAGIC = 'Scenarist_SCC V1.0'.freeze
10
+
11
+ # Grid size
12
+ GRID_ROWS = 15
13
+ GRID_COLUMNS = 32
14
+
15
+ # Grid is just an array with some extra convenience functions and a default size
16
+ class Grid < Array
17
+ def initialize
18
+ super(GRID_ROWS) { Array.new(GRID_COLUMNS) }
19
+ end
20
+
21
+ # The grid is empty when there are no characters in it
22
+ def empty?
23
+ flatten.compact.empty?
24
+ end
25
+
26
+ # Insert continuous text at a given position
27
+ # Returns self for chaining
28
+ def insert_text(row, column, text, style = CharacterStyle.default)
29
+ text.each_char do |char|
30
+ self[row][column] = Character.new(char, style)
31
+ column += 1
32
+ end
33
+ self
34
+ end
35
+ end
36
+
37
+ # Color constants as immutable value objects with some convenience functions (e.g. conversion to string or symbol)
38
+ # All available colors are registered as constants in this class, e.g. Color::WHITE, Color::RED and so on
39
+ # The instances of this class are all frozen and can never be changed.
40
+ # Instances can be retrieved only by the constants or with ::for_value
41
+ class Color
42
+ def initialize(color)
43
+ @color = color
44
+ end
45
+
46
+ # CEA-608 color code
47
+ def value
48
+ @color
49
+ end
50
+ alias to_i value
51
+
52
+ # Lower-case CEA-608 name of the color
53
+ def to_s
54
+ to_symbol.to_s
55
+ end
56
+ alias inspect to_s
57
+
58
+ # rubocop:disable MutableConstant
59
+ COLORS = {}
60
+ TO_SYMBOL_MAP = {}
61
+ # rubocop:enable MutableConstant
62
+
63
+ def self.register_color(name, value)
64
+ # Make sure the new color is immutable
65
+ new_color = Color.new(value).freeze
66
+ # Register in lookup tables
67
+ COLORS[value] = new_color
68
+ TO_SYMBOL_MAP[value] = name
69
+ # Register as class constant
70
+ const_set(name.to_s.upcase, new_color)
71
+ end
72
+
73
+ # CEA-608 colors
74
+ register_color :white, 0
75
+ register_color :green, 1
76
+ register_color :blue, 2
77
+ register_color :cyan, 3
78
+ register_color :red, 4
79
+ register_color :yellow, 5
80
+ register_color :magenta, 6
81
+
82
+ # Prevent future modifications
83
+ COLORS.freeze
84
+ TO_SYMBOL_MAP.freeze
85
+
86
+ # Lower-case CEA608 name of the color as symbol
87
+ def to_symbol
88
+ TO_SYMBOL_MAP[@color]
89
+ end
90
+
91
+ # Get the Color instance corresponding to a CEA608 color code
92
+ def self.for_value(value)
93
+ color = COLORS[value]
94
+ fail "Color value #{value} is unknown" if color.nil?
95
+ color
96
+ end
97
+
98
+ # Disallow creating new instances
99
+ private_class_method :new
100
+ end
101
+
102
+ # Encapsulates properties of single characters
103
+ CharacterStyle = SolidStruct.new(:color, :italics, :underline, :flash)
104
+ class << CharacterStyle
105
+ def default
106
+ CharacterStyle.new(color: Color::WHITE, italics: false, underline: false, flash: false)
107
+ end
108
+ end
109
+
110
+ # One character in the closed caption grid
111
+ Character = SolidStruct.new(:character, :style)
112
+
113
+ # One fully rendered caption displayed at a specific point in time
114
+ Caption = SolidStruct.new(:timecode, :grid)
115
+
116
+ # SCC reader
117
+ # Parse and render an SCC file sequentially into a background and foreground grid
118
+ # like a TV set would do and store the resulting closed captions as grid snapshots into an array
119
+ # whenever the foreground grid changes.
120
+ #
121
+ # Only captions in data channel 1 are read. Also, invalid byte parity will raise an error unless checking is disabled.
122
+ # The advanced recovery methods mentioned in CEA608 are not implemented since the source is assumed to contain no errors (e.g. DVD source).
123
+ class Reader
124
+ # Regular expression for parsing one line of data
125
+ LINE_REGEXP = /^(?<timecode>[0-9:;]+)\t(?<data>(?:[0-9a-fA-F]{4} ?)+)$/
126
+
127
+ # rubocop:disable MutableConstant
128
+
129
+ # Map of standard characters that do not match the standard ASCII codes
130
+ # to their corresponding unicode characters
131
+ STANDARD_CHARACTER_MAP = {
132
+ '*' => "\u00e1",
133
+ '\\' => "\u00e9",
134
+ '^' => "\u00ed",
135
+ '_' => "\u00f3",
136
+ '`' => "\u00fa",
137
+ '{' => "\u00e7",
138
+ '|' => "\u00f7",
139
+ '}' => "\u00d1",
140
+ '~' => "\u00f1",
141
+ "\x7f" => "\u2588"
142
+ }
143
+ # rubocop:enable MutableConstant
144
+ # Simply return the character if no exception matched
145
+ STANDARD_CHARACTER_MAP.default_proc = proc do |_hash, key|
146
+ key
147
+ end
148
+ STANDARD_CHARACTER_MAP.freeze
149
+
150
+ # Map of special characters to unicode codepoints
151
+ SPECIAL_CHARACTER_MAP = {
152
+ '0' => "\u00ae",
153
+ '1' => "\u00b0",
154
+ '2' => "\u00bd",
155
+ '3' => "\u00bf",
156
+ '4' => "\u2122",
157
+ '5' => "\u00a2",
158
+ '6' => "\u00a3",
159
+ '7' => "\u266a",
160
+ '8' => "\u00e0",
161
+ # "\x39" => transparent space is handled specially since it is not a real character
162
+ ':' => "\u00e8",
163
+ ';' => "\u00e2",
164
+ '<' => "\u00ea",
165
+ '=' => "\u00ee",
166
+ '>' => "\u00f4",
167
+ '?' => "\u00fb"
168
+ }.freeze
169
+
170
+ # Map of preamble address code high bytes to their
171
+ # corresponding base row numbers (counted from 0)
172
+ PREAMBLE_ADDRESS_CODE_ROW_MAP = {
173
+ 0x10 => 10,
174
+ 0x11 => 0,
175
+ 0x12 => 2,
176
+ 0x13 => 11,
177
+ 0x14 => 13,
178
+ 0x15 => 4,
179
+ 0x16 => 6,
180
+ 0x17 => 8
181
+ }.freeze
182
+
183
+ # Error classes
184
+ class Error < RuntimeError; end
185
+ class InvalidFormatError < Error; end
186
+ class ParityError < Error; end
187
+
188
+ # Internal state of the parser consisting of current drawing position and character style
189
+ class State
190
+ def initialize(params)
191
+ self.row = params[:row]
192
+ self.column = params[:column]
193
+ @style = params[:style]
194
+ end
195
+
196
+ attr_accessor :style
197
+ attr_reader :row, :column
198
+
199
+ # Make sure the maximum row count is not exceeded
200
+ def row=(row)
201
+ @row = Utility.clamp(row, 0, GRID_ROWS - 1)
202
+ end
203
+
204
+ # Make sure the cursor does not get outside the screen left or right
205
+ def column=(column)
206
+ @column = Utility.clamp(column, 0, GRID_COLUMNS - 1)
207
+ end
208
+
209
+ def self.default
210
+ State.new(row: 0, column: 0, style: CharacterStyle.default)
211
+ end
212
+ end
213
+
214
+ # Actual conversion result
215
+ attr_reader :captions
216
+
217
+ # Read an SCC file from the IO object io for a video
218
+ def read(io, fps, check_parity = true)
219
+ # Initialize new grids for character storage
220
+ @foreground_grid = Grid.new
221
+ @background_grid = Grid.new
222
+ # Initialize state
223
+ @state = State.default
224
+ @captions = []
225
+ @now = Timecode.new(0, fps)
226
+ @data_channel = 0
227
+
228
+ magic = io.readline.chomp!
229
+ fail InvalidFormatError, 'File does not start with "' + Scc::FILE_MAGIC + '"' unless Scc::FILE_MAGIC == magic
230
+
231
+ io.each_line do |line|
232
+ line.chomp!
233
+ # Skip empty lines between the commands
234
+ next if line.empty?
235
+
236
+ line_data = LINE_REGEXP.match(line)
237
+ fail InvalidFormatError, "Invalid line \"#{line}\"" if line_data.nil?
238
+ # Parse timecode
239
+ old_time = @now
240
+ timecode = Timecode.new(line_data[:timecode], fps)
241
+ @now = timecode
242
+ fail InvalidFormatError, 'New timecode is behind last time' if @now < old_time
243
+
244
+ # Parse data words
245
+ parse_data(line_data[:data], check_parity)
246
+ end
247
+ end
248
+
249
+ private
250
+
251
+ # Parse one line of SCC data
252
+ def parse_data(data, check_parity)
253
+ last_command = [0, 0]
254
+
255
+ data.split(' ').each do |word_string|
256
+ begin
257
+ # Decode hexadecimal word into two-byte string
258
+ word = [word_string].pack('H*')
259
+ # Check parity
260
+ fail ParityError, "At least one byte in word #{word_string} has even parity, odd required" unless !check_parity || (correct_parity?(word[0]) && correct_parity?(word[1]))
261
+ # Remove parity bit for further processing
262
+ word = word.bytes.collect { |byte|
263
+ # Unset 8th bit
264
+ (byte & ~(1 << 7))
265
+ }
266
+
267
+ hi, lo = word
268
+
269
+ # First check if the word contains characters only
270
+ if hi >= 0x20 && hi <= 0x7f
271
+ # Skip characters if last command was on different channel
272
+ if @data_channel != 0
273
+ puts 'Skipping characters on channel 2'
274
+ next
275
+ end
276
+
277
+ [hi, lo].each do |byte|
278
+ handle_character(byte)
279
+ end
280
+
281
+ # Reset last command
282
+ last_command = [0, 0]
283
+ else
284
+ if word == last_command
285
+ # Skip commands transmitted twice for redundancy
286
+ # But don't skip the next time, too
287
+ last_command = [0, 0]
288
+ next
289
+ end
290
+
291
+ # Channel information is encoded in the 4th bit, read it out
292
+ @data_channel = (hi >> 3) & 1
293
+ if @data_channel != 0
294
+ puts 'Skipping command on channel 2'
295
+ next
296
+ # If channel 2 processing is needed, parse the file two times and
297
+ # change the above condition as needed, then unset the channel bit
298
+ # for further processing.
299
+ end
300
+
301
+ if hi == 0x11 && lo >= 0x30 && lo <= 0x3f
302
+ # Special character
303
+ handle_special_character(lo)
304
+ elsif hi >= 0x10 && hi <= 0x17 && lo >= 0x40
305
+ # Premable address code
306
+ handle_preamble_address_code(hi, lo)
307
+ elsif (hi == 0x14 || hi == 0x17) && lo >= 0x20 && lo <= 0x2f
308
+ handle_control_code(hi, lo)
309
+ elsif hi == 0x11 && lo >= 0x20 && lo <= 0x2f
310
+ handle_mid_row_code(hi, lo)
311
+ elsif hi == 0x00 && lo == 0x00
312
+ # Ignore filler
313
+ else
314
+ puts "Ignoring unknown command #{hi}/#{lo}"
315
+ end
316
+
317
+ last_command = word
318
+ end
319
+
320
+ ensure
321
+ # Advance one frame for each word read
322
+ @now += 1
323
+ end
324
+ end
325
+ end
326
+
327
+ # Insert one unicode character into the grid at the current position and with the
328
+ # current style, then advance the cursor one column
329
+ def insert_character(char)
330
+ @background_grid[@state.row][@state.column] = Character.new(char, @state.style.dup)
331
+ @state.column += 1
332
+ end
333
+
334
+ # Insert a CEA608 character into the grid at the current position, converting it to its unicode representation
335
+ def handle_character(byte)
336
+ # Ignore filler character
337
+ return if byte == 0
338
+
339
+ char = STANDARD_CHARACTER_MAP[byte.chr]
340
+ insert_character(char)
341
+ end
342
+
343
+ # Insert a special character into the grid at the current position, or delete the current column
344
+ # in case of a transparent space.
345
+ def handle_special_character(byte)
346
+ if byte == 0x39
347
+ # Transparent space: Move cursor after deleting the current column to open up a hole
348
+ @background_grid[@state.row][@state.column] = nil
349
+ @state.column += 1
350
+ else
351
+ char = SPECIAL_CHARACTER_MAP[byte.chr]
352
+ insert_character(char)
353
+ end
354
+ end
355
+
356
+ # Set drawing position and style according to the information in a preamble address code
357
+ def handle_preamble_address_code(hi, lo)
358
+ @state.row = PREAMBLE_ADDRESS_CODE_ROW_MAP[hi]
359
+ # Low byte bit 5 adds 1 to the row number if set
360
+ @state.row += 1 if lo & (1 << 5) != 0
361
+
362
+ # Low byte bit 0 indicates whether underlining is to be enabled
363
+ @state.style.underline = ((lo & 1) == 1)
364
+ # Low byte bit 4 indicates whether it is an indent or a formatting code
365
+ is_indent = (((lo >> 4) & 1) == 1)
366
+ # Low byte bits 1 to 3 are the color or indent code, depending on is_indent
367
+ color_or_indent = (lo >> 1) & 0x7
368
+
369
+ # Reset style
370
+ @state.style.flash = false
371
+ @state.style.italics = false
372
+
373
+ if is_indent
374
+ # Indent code always sets white as color attribute
375
+ @state.style.color = Color::WHITE
376
+ # One indent equals 4 characters
377
+ @state.column = color_or_indent * 4
378
+ elsif color_or_indent == 7
379
+ # "color" 7 is white with italics
380
+ @state.style.color = Color::WHITE
381
+ @state.style.italics = true
382
+ else
383
+ @state.style.color = Color.for_value(color_or_indent)
384
+ end
385
+ end
386
+
387
+ # Process a miscellaneous control code
388
+ def handle_control_code(hi, lo)
389
+ if hi == 0x14 && lo == 0x20
390
+ # Resume caption loading
391
+ # Nothing to do here, only pop-onstyle is supported anyway
392
+ elsif hi == 0x14 && lo == 0x21
393
+ # Backspace
394
+ unless @state.column.zero? # Ignore in the first column
395
+ @state.column -= 1
396
+ # Delete character at cursor after moving one character back
397
+ @background_grid[@state.row][@state.column] = nil
398
+ end
399
+ elsif hi == 0x14 && lo == 0x24
400
+ # Delete to end of row
401
+ (@state.column...GRID_COLUMNS).each do |column|
402
+ @background_grid[@state.row][column] = nil
403
+ end
404
+ elsif hi == 0x14 && lo == 0x28
405
+ # Flash on
406
+ # Flash is a spacing character
407
+ insert_character(' ')
408
+ @state.style.flash = true
409
+ # elsif hi == 0x14 && lo == 0x2b
410
+ # Resume text display -> not a pop-on command
411
+ # fail "RTD"
412
+ elsif hi == 0x14 && lo == 0x2c
413
+ # Erase displayed memory
414
+ @foreground_grid = Grid.new
415
+ post_frame
416
+ elsif hi == 0x14 && lo == 0x2e
417
+ # Erase non-displayed memory
418
+ @background_grid = Grid.new
419
+ elsif hi == 0x14 && lo == 0x2f
420
+ # End of caption (flip memories)
421
+ @foreground_grid, @background_grid = @background_grid, @foreground_grid
422
+ post_frame
423
+ elsif hi == 0x17 && lo >= 0x21 && lo <= 0x23
424
+ # Tab offset
425
+ # Bits 0 and 1 designate how many columns to go
426
+ @state.column += (lo & 0x3)
427
+ else
428
+ puts "Ignoring unknown control code #{hi}/#{lo}"
429
+ end
430
+ end
431
+
432
+ # Process a mid-row code
433
+ def handle_mid_row_code(_hi, lo)
434
+ # Mid-row codes are spacing characters
435
+ insert_character(' ')
436
+ # Low byte bit 0 indicates whether underlining is to be enabled
437
+ @state.style.underline = ((lo & 1) == 1)
438
+ # Low byte bits 1 to 3 are the color code
439
+ color = (lo >> 1) & 0x7
440
+
441
+ if color == 0x7
442
+ @state.style.italics = true
443
+ else
444
+ # Color mid-row codes disable italics
445
+ @state.style.italics = false
446
+ @state.style.color = Color.for_value(color)
447
+ end
448
+ # All mid-row codes always disable flash
449
+ @state.style.flash = false
450
+ end
451
+
452
+ # Insert the currently displayed foreground grid as caption into the captions array
453
+ # Must be called whenever the foreground grid is changed as a result of a command
454
+ def post_frame
455
+ # Only push a new caption if the grid has changed
456
+ if @captions.empty? || @foreground_grid != @last_grid
457
+ # Save space by not saving the grid if it is completely empty
458
+ grid = @foreground_grid.empty? ? nil : @foreground_grid
459
+ @captions.push(Caption.new(timecode: @now, grid: grid))
460
+ @last_grid = @foreground_grid
461
+ end
462
+ end
463
+
464
+ # Check a byte for odd parity
465
+ def correct_parity?(byte)
466
+ byte.ord.to_s(2).count('1').odd?
467
+ end
468
+ end
469
+ end
470
+ end