cabriolet 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +3 -0
  3. data/lib/cabriolet/binary/bitstream.rb +32 -21
  4. data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
  5. data/lib/cabriolet/cab/compressor.rb +85 -53
  6. data/lib/cabriolet/cab/decompressor.rb +2 -1
  7. data/lib/cabriolet/cab/extractor.rb +2 -35
  8. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  9. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  10. data/lib/cabriolet/checksum.rb +49 -0
  11. data/lib/cabriolet/collections/file_collection.rb +175 -0
  12. data/lib/cabriolet/compressors/quantum.rb +3 -51
  13. data/lib/cabriolet/decompressors/quantum.rb +81 -52
  14. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  15. data/lib/cabriolet/extraction/extractor.rb +171 -0
  16. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  17. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  18. data/lib/cabriolet/format_base.rb +79 -0
  19. data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
  20. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  21. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  22. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  23. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  24. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  25. data/lib/cabriolet/huffman/encoder.rb +15 -12
  26. data/lib/cabriolet/lit/compressor.rb +45 -689
  27. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  28. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  29. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  30. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  31. data/lib/cabriolet/lit/header_writer.rb +124 -0
  32. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  33. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  34. data/lib/cabriolet/quantum_shared.rb +105 -0
  35. data/lib/cabriolet/version.rb +1 -1
  36. data/lib/cabriolet.rb +114 -3
  37. metadata +38 -4
  38. data/lib/cabriolet/auto.rb +0 -173
  39. data/lib/cabriolet/parallel.rb +0 -333
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module LIT
5
+ # Encodes LIT content data (NameList, manifest)
6
+ class ContentEncoder
7
+ # Build NameList data from sections
8
+ #
9
+ # @param sections [Array<Hash>] Sections array
10
+ # @return [String] Binary NameList data
11
+ def self.build_namelist_data(sections)
12
+ data = +""
13
+ data += [0].pack("v") # Initial field
14
+
15
+ # Write number of sections
16
+ data += [sections.size].pack("v")
17
+
18
+ # Write each section name
19
+ null_terminator = [0].pack("v")
20
+ sections.each do |section|
21
+ name = section[:name]
22
+ # Convert to UTF-16LE
23
+ name_utf16 = name.encode("UTF-16LE").force_encoding("ASCII-8BIT")
24
+ name_length = name_utf16.bytesize / 2
25
+
26
+ data += [name_length].pack("v")
27
+ data += name_utf16
28
+ data += null_terminator
29
+ end
30
+
31
+ data
32
+ end
33
+
34
+ # Build manifest data from manifest structure
35
+ #
36
+ # @param manifest [Hash] Manifest structure with mappings
37
+ # @return [String] Binary manifest data
38
+ def self.build_manifest_data(manifest)
39
+ data = +""
40
+
41
+ # For simplicity: single directory entry
42
+ data += [0].pack("C") # Empty directory name = end of directories
43
+
44
+ # Write 4 groups
45
+ terminator = [0].pack("C")
46
+ 4.times do |group|
47
+ # Get mappings for this group
48
+ group_mappings = manifest[:mappings].select { |m| m[:group] == group }
49
+
50
+ data += [group_mappings.size].pack("V")
51
+
52
+ group_mappings.each do |mapping|
53
+ data += [mapping[:offset]].pack("V")
54
+
55
+ # Internal name
56
+ data += [mapping[:internal_name].bytesize].pack("C")
57
+ data += mapping[:internal_name]
58
+
59
+ # Original name
60
+ data += [mapping[:original_name].bytesize].pack("C")
61
+ data += mapping[:original_name]
62
+
63
+ # Content type
64
+ data += [mapping[:content_type].bytesize].pack("C")
65
+ data += mapping[:content_type]
66
+
67
+ # Terminator
68
+ data += terminator
69
+ end
70
+ end
71
+
72
+ data
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module LIT
5
+ # Detects content type and file group for LIT files
6
+ class ContentTypeDetector
7
+ # Guess content type from filename
8
+ #
9
+ # @param filename [String] Filename to analyze
10
+ # @return [String] MIME content type
11
+ def self.content_type(filename)
12
+ ext = ::File.extname(filename).downcase
13
+ case ext
14
+ when ".html", ".htm"
15
+ "text/html"
16
+ when ".css"
17
+ "text/css"
18
+ when ".jpg", ".jpeg"
19
+ "image/jpeg"
20
+ when ".png"
21
+ "image/png"
22
+ when ".gif"
23
+ "image/gif"
24
+ when ".txt"
25
+ "text/plain"
26
+ else
27
+ "application/octet-stream"
28
+ end
29
+ end
30
+
31
+ # Guess file group (0=HTML spine, 1=HTML other, 2=CSS, 3=Images)
32
+ #
33
+ # @param filename [String] Filename to analyze
34
+ # @return [Integer] Group number
35
+ def self.file_group(filename)
36
+ ext = ::File.extname(filename).downcase
37
+ case ext
38
+ when ".html", ".htm"
39
+ 0 # HTML spine (simplification - could be group 1 for non-spine)
40
+ when ".css"
41
+ 2 # CSS
42
+ when ".jpg", ".jpeg", ".png", ".gif"
43
+ 3 # Images
44
+ else
45
+ 1 # Other
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module LIT
5
+ # Builds LIT directory structure with AOLL chunks
6
+ class DirectoryBuilder
7
+ # Chunk size for directory entries (8KB)
8
+ DEFAULT_CHUNK_SIZE = 0x2000
9
+
10
+ attr_reader :chunk_size, :entries
11
+
12
+ # Initialize directory builder
13
+ #
14
+ # @param chunk_size [Integer] Chunk size for directory entries
15
+ def initialize(chunk_size: DEFAULT_CHUNK_SIZE)
16
+ @chunk_size = chunk_size
17
+ @entries = []
18
+ end
19
+
20
+ # Add an entry to the directory
21
+ #
22
+ # @param name [String] Entry name
23
+ # @param section [Integer] Section number
24
+ # @param offset [Integer] Offset within section
25
+ # @param size [Integer] Size in bytes
26
+ def add_entry(name:, section:, offset:, size:)
27
+ @entries << {
28
+ name: name,
29
+ section: section,
30
+ offset: offset,
31
+ size: size,
32
+ }
33
+ end
34
+
35
+ # Build the directory structure
36
+ #
37
+ # @return [Hash] Directory structure with entries and metadata
38
+ def build
39
+ {
40
+ entries: @entries,
41
+ chunk_size: @chunk_size,
42
+ num_chunks: calculate_num_chunks,
43
+ }
44
+ end
45
+
46
+ # Build AOLL (Archive Object List List) chunk
47
+ #
48
+ # @return [String] Binary AOLL chunk data
49
+ def build_aoll_chunk
50
+ # Build all entry data first
51
+ entries_data = @entries.map { |entry| encode_entry(entry) }.join
52
+
53
+ # Calculate quickref offset (starts after entries data)
54
+ quickref_offset = entries_data.bytesize
55
+
56
+ # Build AOLL header
57
+ header = Binary::LITStructures::AOLLHeader.new
58
+ header.tag = Binary::LITStructures::Tags::AOLL
59
+ header.quickref_offset = quickref_offset
60
+ header.current_chunk_low = 0
61
+ header.current_chunk_high = 0
62
+ header.prev_chunk_low = 0xFFFFFFFF
63
+ header.prev_chunk_high = 0xFFFFFFFF
64
+ header.next_chunk_low = 0xFFFFFFFF
65
+ header.next_chunk_high = 0xFFFFFFFF
66
+ header.entries_so_far = @entries.size
67
+ header.reserved = 0
68
+ header.chunk_distance = 0
69
+ header.reserved2 = 0
70
+
71
+ header.to_binary_s + entries_data
72
+ end
73
+
74
+ # Calculate total size needed for directory
75
+ #
76
+ # @return [Integer] Size in bytes
77
+ def calculate_size
78
+ # IFCM header + AOLL chunk + padding
79
+ ifcm_size = Binary::LITStructures::IFCMHeader.new.to_binary_s.bytesize
80
+ aoll_size = build_aoll_chunk.bytesize
81
+ target_size = @chunk_size
82
+
83
+ [ifcm_size + aoll_size, target_size].max
84
+ end
85
+
86
+ private
87
+
88
+ # Calculate number of chunks needed
89
+ #
90
+ # @return [Integer] Number of chunks
91
+ def calculate_num_chunks
92
+ return 1 if @entries.empty?
93
+
94
+ total_size = @entries.sum { |e| estimate_entry_size(e) }
95
+ [1, (total_size / @chunk_size.to_f).ceil].max
96
+ end
97
+
98
+ # Estimate size of a directory entry
99
+ #
100
+ # @param entry [Hash] Directory entry
101
+ # @return [Integer] Estimated size
102
+ def estimate_entry_size(entry)
103
+ name_size = entry[:name].bytesize
104
+ # Name length (1-5 bytes) + name + section (1-5 bytes) + offset (1-5 bytes) + size (1-5 bytes)
105
+ 5 + name_size + 15
106
+ end
107
+
108
+ # Encode a directory entry with variable-length integers
109
+ #
110
+ # @param entry [Hash] Directory entry
111
+ # @return [String] Encoded entry data
112
+ def encode_entry(entry)
113
+ data = +""
114
+
115
+ # Encode name length and name
116
+ name = entry[:name].dup.force_encoding("UTF-8")
117
+ data += encode_vint(name.bytesize)
118
+ data += name
119
+
120
+ # Encode section, offset, size
121
+ data += encode_vint(entry[:section])
122
+ data += encode_vint(entry[:offset])
123
+ data += encode_vint(entry[:size])
124
+
125
+ data
126
+ end
127
+
128
+ # Write a variable-length integer (MSB = continuation bit)
129
+ #
130
+ # @param value [Integer] Value to encode
131
+ # @return [String] Encoded integer
132
+ def encode_vint(value)
133
+ return [0x00].pack("C") if value.zero?
134
+
135
+ bytes = []
136
+
137
+ # Extract 7-bit chunks from value
138
+ loop do
139
+ bytes.unshift(value & 0x7F)
140
+ value >>= 7
141
+ break if value.zero?
142
+ end
143
+
144
+ # Set MSB on all bytes except the last
145
+ (0...(bytes.size - 1)).each do |i|
146
+ bytes[i] |= 0x80
147
+ end
148
+
149
+ bytes.pack("C*")
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module LIT
5
+ # Generates GUIDs for LIT files
6
+ class GuidGenerator
7
+ # Generate a random GUID
8
+ #
9
+ # @return [String] 16-byte random GUID
10
+ def self.generate
11
+ require "securerandom"
12
+ SecureRandom.random_bytes(16)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module LIT
5
+ # Writes LIT headers to output
6
+ class HeaderWriter
7
+ # Initialize header writer
8
+ #
9
+ # @param io_system [System::IOSystem] I/O system for writing
10
+ def initialize(io_system)
11
+ @io_system = io_system
12
+ end
13
+
14
+ # Write primary header
15
+ #
16
+ # @param output_handle [System::FileHandle] Output file handle
17
+ # @param structure [Hash] LIT structure
18
+ # @return [Integer] Bytes written
19
+ def write_primary_header(output_handle, structure)
20
+ header = Binary::LITStructures::PrimaryHeader.new
21
+ header.signature = Binary::LITStructures::SIGNATURE
22
+ header.version = structure[:version]
23
+ header.header_length = 40
24
+ header.num_pieces = 5
25
+ header.secondary_header_length = structure[:secondary_header][:length]
26
+ header.header_guid = structure[:header_guid]
27
+
28
+ header_data = header.to_binary_s
29
+ @io_system.write(output_handle, header_data)
30
+ end
31
+
32
+ # Write piece structures
33
+ #
34
+ # @param output_handle [System::FileHandle] Output file handle
35
+ # @param pieces [Array<Hash>] Pieces array
36
+ # @return [Integer] Bytes written
37
+ def write_piece_structures(output_handle, pieces)
38
+ total_bytes = 0
39
+
40
+ pieces.each do |piece|
41
+ piece_struct = Binary::LITStructures::PieceStructure.new
42
+ piece_struct.offset_low = piece[:offset]
43
+ piece_struct.offset_high = 0
44
+ piece_struct.size_low = piece[:size]
45
+ piece_struct.size_high = 0
46
+
47
+ piece_data = piece_struct.to_binary_s
48
+ total_bytes += @io_system.write(output_handle, piece_data)
49
+ end
50
+
51
+ total_bytes
52
+ end
53
+
54
+ # Write secondary header block
55
+ #
56
+ # @param output_handle [System::FileHandle] Output file handle
57
+ # @param sec_hdr [Hash] Secondary header metadata
58
+ # @return [Integer] Bytes written
59
+ def write_secondary_header(output_handle, sec_hdr)
60
+ header = Binary::LITStructures::SecondaryHeader.new
61
+
62
+ # SECHDR block
63
+ header.sechdr_version = 2
64
+ header.sechdr_length = 152
65
+
66
+ # Entry directory info
67
+ header.entry_aoli_idx = 0
68
+ header.entry_aoli_idx_high = 0
69
+ header.entry_reserved1 = 0
70
+ header.entry_last_aoll = 0
71
+ header.entry_reserved2 = 0
72
+ header.entry_chunklen = sec_hdr[:entry_chunklen]
73
+ header.entry_two = 2
74
+ header.entry_reserved3 = 0
75
+ header.entry_depth = sec_hdr[:entry_depth]
76
+ header.entry_reserved4 = 0
77
+ header.entry_entries = sec_hdr[:entry_entries]
78
+ header.entry_reserved5 = 0
79
+
80
+ # Count directory info
81
+ header.count_aoli_idx = 0xFFFFFFFF
82
+ header.count_aoli_idx_high = 0xFFFFFFFF
83
+ header.count_reserved1 = 0
84
+ header.count_last_aoll = 0
85
+ header.count_reserved2 = 0
86
+ header.count_chunklen = sec_hdr[:count_chunklen]
87
+ header.count_two = 2
88
+ header.count_reserved3 = 0
89
+ header.count_depth = 1
90
+ header.count_reserved4 = 0
91
+ header.count_entries = sec_hdr[:count_entries]
92
+ header.count_reserved5 = 0
93
+
94
+ header.entry_unknown = sec_hdr[:entry_unknown]
95
+ header.count_unknown = sec_hdr[:count_unknown]
96
+
97
+ # CAOL block
98
+ header.caol_tag = Binary::LITStructures::Tags::CAOL
99
+ header.caol_version = 2
100
+ header.caol_length = 80 # 48 + 32
101
+ header.creator_id = sec_hdr[:creator_id]
102
+ header.caol_reserved1 = 0
103
+ header.caol_entry_chunklen = sec_hdr[:entry_chunklen]
104
+ header.caol_count_chunklen = sec_hdr[:count_chunklen]
105
+ header.caol_entry_unknown = sec_hdr[:entry_unknown]
106
+ header.caol_count_unknown = sec_hdr[:count_unknown]
107
+ header.caol_reserved2 = 0
108
+
109
+ # ITSF block
110
+ header.itsf_tag = Binary::LITStructures::Tags::ITSF
111
+ header.itsf_version = 4
112
+ header.itsf_length = 32
113
+ header.itsf_unknown = 1
114
+ header.content_offset_low = sec_hdr[:content_offset]
115
+ header.content_offset_high = 0
116
+ header.timestamp = sec_hdr[:timestamp]
117
+ header.language_id = sec_hdr[:language_id]
118
+
119
+ header_data = header.to_binary_s
120
+ @io_system.write(output_handle, header_data)
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "directory_builder"
4
+
5
+ module Cabriolet
6
+ module LIT
7
+ # Builds piece data for LIT files
8
+ class PieceBuilder
9
+ # Build piece 0 data (file size information)
10
+ #
11
+ # @param file_data [Array<Hash>] File data array
12
+ # @return [String] Binary piece 0 data
13
+ def self.build_piece0(file_data)
14
+ # Calculate total content size
15
+ content_size = file_data.sum { |f| f[:uncompressed_size] }
16
+
17
+ data = [Binary::LITStructures::Tags::SIZE_PIECE].pack("V")
18
+ data += [content_size].pack("V")
19
+ data += [0, 0].pack("VV") # High bits, reserved
20
+ data
21
+ end
22
+
23
+ # Build piece 1 data (directory IFCM structure)
24
+ #
25
+ # @param directory [Hash] Directory structure from DirectoryBuilder
26
+ # @return [String] Binary piece 1 data
27
+ def self.build_piece1(directory)
28
+ builder = DirectoryBuilder.new(chunk_size: directory[:chunk_size])
29
+
30
+ # Build IFCM header
31
+ ifcm = Binary::LITStructures::IFCMHeader.new
32
+ ifcm.tag = Binary::LITStructures::Tags::IFCM
33
+ ifcm.version = 1
34
+ ifcm.chunk_size = directory[:chunk_size]
35
+ ifcm.param = 0x100000
36
+ ifcm.reserved1 = 0xFFFFFFFF
37
+ ifcm.reserved2 = 0xFFFFFFFF
38
+ ifcm.num_chunks = directory[:num_chunks]
39
+ ifcm.reserved3 = 0
40
+
41
+ data = ifcm.to_binary_s
42
+
43
+ # Build AOLL chunk with directory entries
44
+ directory[:entries].each do |entry|
45
+ builder.add_entry(
46
+ name: entry[:name],
47
+ section: entry[:section],
48
+ offset: entry[:offset],
49
+ size: entry[:size],
50
+ )
51
+ end
52
+
53
+ aoll_chunk = builder.build_aoll_chunk
54
+ data += aoll_chunk
55
+
56
+ # Pad to fill piece (8KB standard)
57
+ target_size = 8192
58
+ if data.bytesize < target_size
59
+ data += "\x00" * (target_size - data.bytesize)
60
+ end
61
+
62
+ data
63
+ end
64
+
65
+ # Build piece 2 data (index information)
66
+ #
67
+ # @return [String] Binary piece 2 data
68
+ def self.build_piece2
69
+ # Minimal index data for foundation
70
+ "\x00" * 512
71
+ end
72
+ end
73
+ end
74
+ end