subconv 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,129 @@
1
+ require 'subconv/caption'
2
+
3
+ module Subconv
4
+ # Apply post-processing to captions
5
+ # Supported operations:
6
+ # - remove color nodes
7
+ # - remove flash nodes
8
+ # - convert XY viewport relative positions to simple top or bottom positions
9
+ # - merge multiple captions on screen at the same time into one caption
10
+ class CaptionFilter
11
+ def initialize(options)
12
+ @options = options
13
+ @options[:filter_node_types] ||= []
14
+ @options[:filter_node_types].push ColorNode if @options[:remove_color]
15
+ @options[:filter_node_types].push FlashNode if @options[:remove_flash]
16
+ end
17
+
18
+ def process!(captions)
19
+ last_timespan = nil
20
+ open_captions = {}
21
+ last_top_position = nil
22
+
23
+ captions.map! do |caption|
24
+ is_same_timespan = last_timespan == caption.timespan
25
+
26
+ unless is_same_timespan
27
+ # Time changed -> do not compare with previous top position
28
+ last_top_position = nil
29
+ open_captions = {}
30
+ end
31
+
32
+ if @options[:xy_position_to_top_or_bottom]
33
+ caption.position = if caption.position.y < 0.5
34
+ last_top_position = caption.position.y
35
+ :top
36
+ elsif !last_top_position.nil? && (caption.position.y - last_top_position) < 0.08
37
+ # Do not move lines to the bottom when they are continuing directly
38
+ # from a caption displayed at the top to avoid continuous captions
39
+ # being thorn in half
40
+ last_top_position = caption.position.y
41
+ :top
42
+ else
43
+ :bottom
44
+ end
45
+
46
+ # x position is removed altogether and the caption is now center-aligned
47
+ caption.align = :middle
48
+ end
49
+
50
+ # Captions are expected to be ordered by increasing timespan and y position (this is
51
+ # guaranteed for the SCC reader)
52
+ if @options[:merge_by_position] && is_same_timespan && open_captions.key?(caption.position)
53
+ target_caption = open_captions[caption.position]
54
+ target_caption.content.children.push TextNode.new("\n")
55
+ target_caption.content.children.concat caption.content.children
56
+ # Remove this caption since it has been merged
57
+ next
58
+ end
59
+
60
+ last_timespan = caption.timespan
61
+
62
+ open_captions[caption.position] = caption
63
+ caption
64
+ end
65
+ # Remove nils resulting from removed captions
66
+ captions.compact!
67
+ # Do per-caption processing after merging etc.
68
+ captions.each do |caption|
69
+ process_caption!(caption)
70
+ end
71
+ end
72
+
73
+ def process_caption!(caption)
74
+ filter_nodes! caption.content
75
+ merge_text_nodes! caption.content
76
+ end
77
+
78
+ private
79
+
80
+ # Merge continuous text nodes
81
+ # Example: [TextNode("a"), TextNode("b")] -> [TextNode("ab")]
82
+ def merge_text_nodes!(node)
83
+ return node unless node.is_a? ContainerNode
84
+
85
+ current_text_node = nil
86
+ node.children.map! do |child|
87
+ if child.is_a? TextNode
88
+ if current_text_node.nil?
89
+ current_text_node = child
90
+ else
91
+ # Add text to previous node
92
+ current_text_node.text << child.text
93
+ # Remove this node
94
+ next
95
+ end
96
+ else
97
+ current_text_node = nil
98
+ merge_text_nodes! child
99
+ end
100
+ child
101
+ end
102
+
103
+ # Remove nils from removed text nodes
104
+ node.children.compact!
105
+ end
106
+
107
+ # Remove specified nodes in an array, i.e. replace them with their children
108
+ # Only container nodes can be filtered
109
+ def filter_nodes!(node)
110
+ return node unless node.is_a? ContainerNode
111
+
112
+ node.children.map! do |child|
113
+ # Filter recursively
114
+ filter_nodes! child
115
+ if @options[:filter_node_types].include?(child.class)
116
+ # Replace child with contents
117
+ child.children
118
+ else
119
+ child
120
+ end
121
+ end
122
+ # Flatten away arrays that might have been introduced
123
+ # by removing nodes
124
+ node.children.flatten!
125
+
126
+ node
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,470 @@
1
+ # frozen_string_literal: true
2
+ require 'subconv/utility'
3
+
4
+ require 'solid_struct'
5
+ require 'timecode'
6
+
7
+ module Subconv
8
+ module Scc
9
+ FILE_MAGIC = 'Scenarist_SCC V1.0'.freeze
10
+
11
+ # Grid size
12
+ GRID_ROWS = 15
13
+ GRID_COLUMNS = 32
14
+
15
+ # Grid is just an array with some extra convenience functions and a default size
16
+ class Grid < Array
17
+ def initialize
18
+ super(GRID_ROWS) { Array.new(GRID_COLUMNS) }
19
+ end
20
+
21
+ # The grid is empty when there are no characters in it
22
+ def empty?
23
+ flatten.compact.empty?
24
+ end
25
+
26
+ # Insert continuous text at a given position
27
+ # Returns self for chaining
28
+ def insert_text(row, column, text, style = CharacterStyle.default)
29
+ text.each_char do |char|
30
+ self[row][column] = Character.new(char, style)
31
+ column += 1
32
+ end
33
+ self
34
+ end
35
+ end
36
+
37
+ # Color constants as immutable value objects with some convenience functions (e.g. conversion to string or symbol)
38
+ # All available colors are registered as constants in this class, e.g. Color::WHITE, Color::RED and so on
39
+ # The instances of this class are all frozen and can never be changed.
40
+ # Instances can be retrieved only by the constants or with ::for_value
41
+ class Color
42
+ def initialize(color)
43
+ @color = color
44
+ end
45
+
46
+ # CEA-608 color code
47
+ def value
48
+ @color
49
+ end
50
+ alias to_i value
51
+
52
+ # Lower-case CEA-608 name of the color
53
+ def to_s
54
+ to_symbol.to_s
55
+ end
56
+ alias inspect to_s
57
+
58
+ # rubocop:disable MutableConstant
59
+ COLORS = {}
60
+ TO_SYMBOL_MAP = {}
61
+ # rubocop:enable MutableConstant
62
+
63
+ def self.register_color(name, value)
64
+ # Make sure the new color is immutable
65
+ new_color = Color.new(value).freeze
66
+ # Register in lookup tables
67
+ COLORS[value] = new_color
68
+ TO_SYMBOL_MAP[value] = name
69
+ # Register as class constant
70
+ const_set(name.to_s.upcase, new_color)
71
+ end
72
+
73
+ # CEA-608 colors
74
+ register_color :white, 0
75
+ register_color :green, 1
76
+ register_color :blue, 2
77
+ register_color :cyan, 3
78
+ register_color :red, 4
79
+ register_color :yellow, 5
80
+ register_color :magenta, 6
81
+
82
+ # Prevent future modifications
83
+ COLORS.freeze
84
+ TO_SYMBOL_MAP.freeze
85
+
86
+ # Lower-case CEA608 name of the color as symbol
87
+ def to_symbol
88
+ TO_SYMBOL_MAP[@color]
89
+ end
90
+
91
+ # Get the Color instance corresponding to a CEA608 color code
92
+ def self.for_value(value)
93
+ color = COLORS[value]
94
+ fail "Color value #{value} is unknown" if color.nil?
95
+ color
96
+ end
97
+
98
+ # Disallow creating new instances
99
+ private_class_method :new
100
+ end
101
+
102
+ # Encapsulates properties of single characters
103
+ CharacterStyle = SolidStruct.new(:color, :italics, :underline, :flash)
104
+ class << CharacterStyle
105
+ def default
106
+ CharacterStyle.new(color: Color::WHITE, italics: false, underline: false, flash: false)
107
+ end
108
+ end
109
+
110
+ # One character in the closed caption grid
111
+ Character = SolidStruct.new(:character, :style)
112
+
113
+ # One fully rendered caption displayed at a specific point in time
114
+ Caption = SolidStruct.new(:timecode, :grid)
115
+
116
+ # SCC reader
117
+ # Parse and render an SCC file sequentially into a background and foreground grid
118
+ # like a TV set would do and store the resulting closed captions as grid snapshots into an array
119
+ # whenever the foreground grid changes.
120
+ #
121
+ # Only captions in data channel 1 are read. Also, invalid byte parity will raise an error unless checking is disabled.
122
+ # The advanced recovery methods mentioned in CEA608 are not implemented since the source is assumed to contain no errors (e.g. DVD source).
123
+ class Reader
124
+ # Regular expression for parsing one line of data
125
+ LINE_REGEXP = /^(?<timecode>[0-9:;]+)\t(?<data>(?:[0-9a-fA-F]{4} ?)+)$/
126
+
127
+ # rubocop:disable MutableConstant
128
+
129
+ # Map of standard characters that do not match the standard ASCII codes
130
+ # to their corresponding unicode characters
131
+ STANDARD_CHARACTER_MAP = {
132
+ '*' => "\u00e1",
133
+ '\\' => "\u00e9",
134
+ '^' => "\u00ed",
135
+ '_' => "\u00f3",
136
+ '`' => "\u00fa",
137
+ '{' => "\u00e7",
138
+ '|' => "\u00f7",
139
+ '}' => "\u00d1",
140
+ '~' => "\u00f1",
141
+ "\x7f" => "\u2588"
142
+ }
143
+ # rubocop:enable MutableConstant
144
+ # Simply return the character if no exception matched
145
+ STANDARD_CHARACTER_MAP.default_proc = proc do |_hash, key|
146
+ key
147
+ end
148
+ STANDARD_CHARACTER_MAP.freeze
149
+
150
+ # Map of special characters to unicode codepoints
151
+ SPECIAL_CHARACTER_MAP = {
152
+ '0' => "\u00ae",
153
+ '1' => "\u00b0",
154
+ '2' => "\u00bd",
155
+ '3' => "\u00bf",
156
+ '4' => "\u2122",
157
+ '5' => "\u00a2",
158
+ '6' => "\u00a3",
159
+ '7' => "\u266a",
160
+ '8' => "\u00e0",
161
+ # "\x39" => transparent space is handled specially since it is not a real character
162
+ ':' => "\u00e8",
163
+ ';' => "\u00e2",
164
+ '<' => "\u00ea",
165
+ '=' => "\u00ee",
166
+ '>' => "\u00f4",
167
+ '?' => "\u00fb"
168
+ }.freeze
169
+
170
+ # Map of preamble address code high bytes to their
171
+ # corresponding base row numbers (counted from 0)
172
+ PREAMBLE_ADDRESS_CODE_ROW_MAP = {
173
+ 0x10 => 10,
174
+ 0x11 => 0,
175
+ 0x12 => 2,
176
+ 0x13 => 11,
177
+ 0x14 => 13,
178
+ 0x15 => 4,
179
+ 0x16 => 6,
180
+ 0x17 => 8
181
+ }.freeze
182
+
183
+ # Error classes
184
+ class Error < RuntimeError; end
185
+ class InvalidFormatError < Error; end
186
+ class ParityError < Error; end
187
+
188
+ # Internal state of the parser consisting of current drawing position and character style
189
+ class State
190
+ def initialize(params)
191
+ self.row = params[:row]
192
+ self.column = params[:column]
193
+ @style = params[:style]
194
+ end
195
+
196
+ attr_accessor :style
197
+ attr_reader :row, :column
198
+
199
+ # Make sure the maximum row count is not exceeded
200
+ def row=(row)
201
+ @row = Utility.clamp(row, 0, GRID_ROWS - 1)
202
+ end
203
+
204
+ # Make sure the cursor does not get outside the screen left or right
205
+ def column=(column)
206
+ @column = Utility.clamp(column, 0, GRID_COLUMNS - 1)
207
+ end
208
+
209
+ def self.default
210
+ State.new(row: 0, column: 0, style: CharacterStyle.default)
211
+ end
212
+ end
213
+
214
+ # Actual conversion result
215
+ attr_reader :captions
216
+
217
+ # Read an SCC file from the IO object io for a video
218
+ def read(io, fps, check_parity = true)
219
+ # Initialize new grids for character storage
220
+ @foreground_grid = Grid.new
221
+ @background_grid = Grid.new
222
+ # Initialize state
223
+ @state = State.default
224
+ @captions = []
225
+ @now = Timecode.new(0, fps)
226
+ @data_channel = 0
227
+
228
+ magic = io.readline.chomp!
229
+ fail InvalidFormatError, 'File does not start with "' + Scc::FILE_MAGIC + '"' unless Scc::FILE_MAGIC == magic
230
+
231
+ io.each_line do |line|
232
+ line.chomp!
233
+ # Skip empty lines between the commands
234
+ next if line.empty?
235
+
236
+ line_data = LINE_REGEXP.match(line)
237
+ fail InvalidFormatError, "Invalid line \"#{line}\"" if line_data.nil?
238
+ # Parse timecode
239
+ old_time = @now
240
+ timecode = Timecode.new(line_data[:timecode], fps)
241
+ @now = timecode
242
+ fail InvalidFormatError, 'New timecode is behind last time' if @now < old_time
243
+
244
+ # Parse data words
245
+ parse_data(line_data[:data], check_parity)
246
+ end
247
+ end
248
+
249
+ private
250
+
251
+ # Parse one line of SCC data
252
+ def parse_data(data, check_parity)
253
+ last_command = [0, 0]
254
+
255
+ data.split(' ').each do |word_string|
256
+ begin
257
+ # Decode hexadecimal word into two-byte string
258
+ word = [word_string].pack('H*')
259
+ # Check parity
260
+ fail ParityError, "At least one byte in word #{word_string} has even parity, odd required" unless !check_parity || (correct_parity?(word[0]) && correct_parity?(word[1]))
261
+ # Remove parity bit for further processing
262
+ word = word.bytes.collect { |byte|
263
+ # Unset 8th bit
264
+ (byte & ~(1 << 7))
265
+ }
266
+
267
+ hi, lo = word
268
+
269
+ # First check if the word contains characters only
270
+ if hi >= 0x20 && hi <= 0x7f
271
+ # Skip characters if last command was on different channel
272
+ if @data_channel != 0
273
+ puts 'Skipping characters on channel 2'
274
+ next
275
+ end
276
+
277
+ [hi, lo].each do |byte|
278
+ handle_character(byte)
279
+ end
280
+
281
+ # Reset last command
282
+ last_command = [0, 0]
283
+ else
284
+ if word == last_command
285
+ # Skip commands transmitted twice for redundancy
286
+ # But don't skip the next time, too
287
+ last_command = [0, 0]
288
+ next
289
+ end
290
+
291
+ # Channel information is encoded in the 4th bit, read it out
292
+ @data_channel = (hi >> 3) & 1
293
+ if @data_channel != 0
294
+ puts 'Skipping command on channel 2'
295
+ next
296
+ # If channel 2 processing is needed, parse the file two times and
297
+ # change the above condition as needed, then unset the channel bit
298
+ # for further processing.
299
+ end
300
+
301
+ if hi == 0x11 && lo >= 0x30 && lo <= 0x3f
302
+ # Special character
303
+ handle_special_character(lo)
304
+ elsif hi >= 0x10 && hi <= 0x17 && lo >= 0x40
305
+ # Premable address code
306
+ handle_preamble_address_code(hi, lo)
307
+ elsif (hi == 0x14 || hi == 0x17) && lo >= 0x20 && lo <= 0x2f
308
+ handle_control_code(hi, lo)
309
+ elsif hi == 0x11 && lo >= 0x20 && lo <= 0x2f
310
+ handle_mid_row_code(hi, lo)
311
+ elsif hi == 0x00 && lo == 0x00
312
+ # Ignore filler
313
+ else
314
+ puts "Ignoring unknown command #{hi}/#{lo}"
315
+ end
316
+
317
+ last_command = word
318
+ end
319
+
320
+ ensure
321
+ # Advance one frame for each word read
322
+ @now += 1
323
+ end
324
+ end
325
+ end
326
+
327
+ # Insert one unicode character into the grid at the current position and with the
328
+ # current style, then advance the cursor one column
329
+ def insert_character(char)
330
+ @background_grid[@state.row][@state.column] = Character.new(char, @state.style.dup)
331
+ @state.column += 1
332
+ end
333
+
334
+ # Insert a CEA608 character into the grid at the current position, converting it to its unicode representation
335
+ def handle_character(byte)
336
+ # Ignore filler character
337
+ return if byte == 0
338
+
339
+ char = STANDARD_CHARACTER_MAP[byte.chr]
340
+ insert_character(char)
341
+ end
342
+
343
+ # Insert a special character into the grid at the current position, or delete the current column
344
+ # in case of a transparent space.
345
+ def handle_special_character(byte)
346
+ if byte == 0x39
347
+ # Transparent space: Move cursor after deleting the current column to open up a hole
348
+ @background_grid[@state.row][@state.column] = nil
349
+ @state.column += 1
350
+ else
351
+ char = SPECIAL_CHARACTER_MAP[byte.chr]
352
+ insert_character(char)
353
+ end
354
+ end
355
+
356
+ # Set drawing position and style according to the information in a preamble address code
357
+ def handle_preamble_address_code(hi, lo)
358
+ @state.row = PREAMBLE_ADDRESS_CODE_ROW_MAP[hi]
359
+ # Low byte bit 5 adds 1 to the row number if set
360
+ @state.row += 1 if lo & (1 << 5) != 0
361
+
362
+ # Low byte bit 0 indicates whether underlining is to be enabled
363
+ @state.style.underline = ((lo & 1) == 1)
364
+ # Low byte bit 4 indicates whether it is an indent or a formatting code
365
+ is_indent = (((lo >> 4) & 1) == 1)
366
+ # Low byte bits 1 to 3 are the color or indent code, depending on is_indent
367
+ color_or_indent = (lo >> 1) & 0x7
368
+
369
+ # Reset style
370
+ @state.style.flash = false
371
+ @state.style.italics = false
372
+
373
+ if is_indent
374
+ # Indent code always sets white as color attribute
375
+ @state.style.color = Color::WHITE
376
+ # One indent equals 4 characters
377
+ @state.column = color_or_indent * 4
378
+ elsif color_or_indent == 7
379
+ # "color" 7 is white with italics
380
+ @state.style.color = Color::WHITE
381
+ @state.style.italics = true
382
+ else
383
+ @state.style.color = Color.for_value(color_or_indent)
384
+ end
385
+ end
386
+
387
+ # Process a miscellaneous control code
388
+ def handle_control_code(hi, lo)
389
+ if hi == 0x14 && lo == 0x20
390
+ # Resume caption loading
391
+ # Nothing to do here, only pop-onstyle is supported anyway
392
+ elsif hi == 0x14 && lo == 0x21
393
+ # Backspace
394
+ unless @state.column.zero? # Ignore in the first column
395
+ @state.column -= 1
396
+ # Delete character at cursor after moving one character back
397
+ @background_grid[@state.row][@state.column] = nil
398
+ end
399
+ elsif hi == 0x14 && lo == 0x24
400
+ # Delete to end of row
401
+ (@state.column...GRID_COLUMNS).each do |column|
402
+ @background_grid[@state.row][column] = nil
403
+ end
404
+ elsif hi == 0x14 && lo == 0x28
405
+ # Flash on
406
+ # Flash is a spacing character
407
+ insert_character(' ')
408
+ @state.style.flash = true
409
+ # elsif hi == 0x14 && lo == 0x2b
410
+ # Resume text display -> not a pop-on command
411
+ # fail "RTD"
412
+ elsif hi == 0x14 && lo == 0x2c
413
+ # Erase displayed memory
414
+ @foreground_grid = Grid.new
415
+ post_frame
416
+ elsif hi == 0x14 && lo == 0x2e
417
+ # Erase non-displayed memory
418
+ @background_grid = Grid.new
419
+ elsif hi == 0x14 && lo == 0x2f
420
+ # End of caption (flip memories)
421
+ @foreground_grid, @background_grid = @background_grid, @foreground_grid
422
+ post_frame
423
+ elsif hi == 0x17 && lo >= 0x21 && lo <= 0x23
424
+ # Tab offset
425
+ # Bits 0 and 1 designate how many columns to go
426
+ @state.column += (lo & 0x3)
427
+ else
428
+ puts "Ignoring unknown control code #{hi}/#{lo}"
429
+ end
430
+ end
431
+
432
+ # Process a mid-row code
433
+ def handle_mid_row_code(_hi, lo)
434
+ # Mid-row codes are spacing characters
435
+ insert_character(' ')
436
+ # Low byte bit 0 indicates whether underlining is to be enabled
437
+ @state.style.underline = ((lo & 1) == 1)
438
+ # Low byte bits 1 to 3 are the color code
439
+ color = (lo >> 1) & 0x7
440
+
441
+ if color == 0x7
442
+ @state.style.italics = true
443
+ else
444
+ # Color mid-row codes disable italics
445
+ @state.style.italics = false
446
+ @state.style.color = Color.for_value(color)
447
+ end
448
+ # All mid-row codes always disable flash
449
+ @state.style.flash = false
450
+ end
451
+
452
+ # Insert the currently displayed foreground grid as caption into the captions array
453
+ # Must be called whenever the foreground grid is changed as a result of a command
454
+ def post_frame
455
+ # Only push a new caption if the grid has changed
456
+ if @captions.empty? || @foreground_grid != @last_grid
457
+ # Save space by not saving the grid if it is completely empty
458
+ grid = @foreground_grid.empty? ? nil : @foreground_grid
459
+ @captions.push(Caption.new(timecode: @now, grid: grid))
460
+ @last_grid = @foreground_grid
461
+ end
462
+ end
463
+
464
+ # Check a byte for odd parity
465
+ def correct_parity?(byte)
466
+ byte.ord.to_s(2).count('1').odd?
467
+ end
468
+ end
469
+ end
470
+ end