omnizip 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +99 -2
- data/docs/guides/archive-formats/index.adoc +31 -1
- data/docs/guides/archive-formats/ole-format.adoc +316 -0
- data/docs/guides/archive-formats/rpm-format.adoc +249 -0
- data/docs/index.adoc +12 -2
- data/lib/omnizip/algorithms/zstandard/constants.rb +9 -4
- data/lib/omnizip/algorithms/zstandard/fse/table.rb +13 -1
- data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
- data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
- data/lib/omnizip/formats/ole/constants.rb +61 -0
- data/lib/omnizip/formats/ole/dirent.rb +380 -0
- data/lib/omnizip/formats/ole/header.rb +198 -0
- data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
- data/lib/omnizip/formats/ole/storage.rb +305 -0
- data/lib/omnizip/formats/ole/types/variant.rb +328 -0
- data/lib/omnizip/formats/ole.rb +145 -0
- data/lib/omnizip/formats/rpm/constants.rb +58 -0
- data/lib/omnizip/formats/rpm/entry.rb +102 -0
- data/lib/omnizip/formats/rpm/header.rb +113 -0
- data/lib/omnizip/formats/rpm/lead.rb +122 -0
- data/lib/omnizip/formats/rpm/tag.rb +230 -0
- data/lib/omnizip/formats/rpm.rb +434 -0
- data/lib/omnizip/formats/xar/header.rb +22 -2
- data/lib/omnizip/formats/xar/reader.rb +4 -4
- data/lib/omnizip/formats/xar/toc.rb +10 -10
- data/lib/omnizip/formats/xar/writer.rb +3 -1
- data/lib/omnizip/version.rb +1 -1
- metadata +18 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6f138ecaafda0455b4a5d3abce1ce426dcbfc2fd851bcdf0e0b0892a9b9b4287
|
|
4
|
+
data.tar.gz: 1f94b97bd5e7b102abb907170d393edf4b192059c6aa5862d81b5b48ee354466
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 946e0c8adee99dde0acbf309a1ca30765459f15d543775f1964ca603eee24bd1628582797aa7567ed367c46d0f6adf03f56541071e83dfe0ca91f476289f0166
|
|
7
|
+
data.tar.gz: 3e9760f59d981bd97976099dd46eae135ad569a1259d37205a51b4e8a081379ff3f53c08d262a680a131619ca16dd8cdeb27aec89e381e1347d4c2436ac24a62
|
data/README.adoc
CHANGED
|
@@ -367,6 +367,100 @@ All libarchive XAR test cases pass, including:
|
|
|
367
367
|
* ✅ Extended attributes
|
|
368
368
|
* ✅ Various checksum algorithms
|
|
369
369
|
|
|
370
|
+
==== RPM Format Support (v0.4.0)
|
|
371
|
+
|
|
372
|
+
Omnizip provides complete RPM package format support for reading package metadata and extracting file contents.
|
|
373
|
+
|
|
374
|
+
**Status**: ✅ **Full Support** - Complete RPM reading and extraction
|
|
375
|
+
|
|
376
|
+
**What Works**:
|
|
377
|
+
|
|
378
|
+
* ✅ RPM lead parsing (magic, version, name, architecture)
|
|
379
|
+
* ✅ Header parsing with tag extraction (NAME, VERSION, RELEASE, etc.)
|
|
380
|
+
* ✅ File list extraction (basenames, directories, permissions)
|
|
381
|
+
* ✅ Dependency information (requires, provides, conflicts)
|
|
382
|
+
* ✅ Payload extraction with multiple compression formats
|
|
383
|
+
* ✅ gzip, bzip2, xz, zstd decompression support
|
|
384
|
+
|
|
385
|
+
**Usage**:
|
|
386
|
+
|
|
387
|
+
[source,ruby]
|
|
388
|
+
----
|
|
389
|
+
require 'omnizip'
|
|
390
|
+
|
|
391
|
+
# Read RPM package metadata
|
|
392
|
+
Omnizip::Formats::Rpm.open('package.rpm') do |rpm|
|
|
393
|
+
puts "Name: #{rpm.name}"
|
|
394
|
+
puts "Version: #{rpm.version}"
|
|
395
|
+
puts "Release: #{rpm.release}"
|
|
396
|
+
puts "Architecture: #{rpm.architecture}"
|
|
397
|
+
puts "Files: #{rpm.files.count}"
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
# Extract RPM contents
|
|
401
|
+
Omnizip::Formats::Rpm.extract('package.rpm', 'output/')
|
|
402
|
+
|
|
403
|
+
# List files in RPM
|
|
404
|
+
files = Omnizip::Formats::Rpm.list('package.rpm')
|
|
405
|
+
files.each { |f| puts f }
|
|
406
|
+
|
|
407
|
+
# Get package information
|
|
408
|
+
info = Omnizip::Formats::Rpm.info('package.rpm')
|
|
409
|
+
puts "#{info[:name]}-#{info[:version]}-#{info[:release]}"
|
|
410
|
+
----
|
|
411
|
+
|
|
412
|
+
**Architecture**:
|
|
413
|
+
|
|
414
|
+
* `Formats::Rpm::Reader` - Public API for reading RPM packages
|
|
415
|
+
* `Formats::Rpm::Lead` - 96-byte lead parser
|
|
416
|
+
* `Formats::Rpm::Header` - Header structure with tag extraction
|
|
417
|
+
* `Formats::Rpm::Entry` - File entry model
|
|
418
|
+
|
|
419
|
+
==== OLE Format Support (v0.4.0)
|
|
420
|
+
|
|
421
|
+
Omnizip provides complete OLE (Object Linking and Embedding) compound document format support for reading Microsoft compound files.
|
|
422
|
+
|
|
423
|
+
**Status**: ✅ **Full Support** - Complete OLE reading
|
|
424
|
+
|
|
425
|
+
**What Works**:
|
|
426
|
+
|
|
427
|
+
* ✅ OLE compound document header parsing
|
|
428
|
+
* ✅ Block allocation tables (BAT, SBAT, XBAT)
|
|
429
|
+
* ✅ Directory entry navigation
|
|
430
|
+
* ✅ File stream extraction
|
|
431
|
+
* ✅ Support for .doc, .xls, .ppt, .msi files
|
|
432
|
+
* ✅ Property set storage
|
|
433
|
+
|
|
434
|
+
**Usage**:
|
|
435
|
+
|
|
436
|
+
[source,ruby]
|
|
437
|
+
----
|
|
438
|
+
require 'omnizip'
|
|
439
|
+
|
|
440
|
+
# Open OLE compound document
|
|
441
|
+
Omnizip::Formats::Ole.open('document.doc') do |ole|
|
|
442
|
+
# List all streams in the document
|
|
443
|
+
ole.each_entry do |entry|
|
|
444
|
+
puts "#{entry.name} (#{entry.size} bytes)"
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# Read a specific stream
|
|
448
|
+
data = ole.read_stream('WordDocument')
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# Extract all streams
|
|
452
|
+
Omnizip::Formats::Ole.extract('document.doc', 'output/')
|
|
453
|
+
----
|
|
454
|
+
|
|
455
|
+
**Architecture**:
|
|
456
|
+
|
|
457
|
+
* `Formats::Ole::Storage` - Core storage implementation
|
|
458
|
+
* `Formats::Ole::Header` - 512-byte header parser
|
|
459
|
+
* `Formats::Ole::AllocationTable` - BAT/SBAT management
|
|
460
|
+
* `Formats::Ole::Dirent` - 128-byte directory entry
|
|
461
|
+
* `Formats::Ole::RangesIO` - Range-based IO wrapper
|
|
462
|
+
* `Formats::Ole::Types` - Type serialization (Variant, Lpstr, FileTime, etc.)
|
|
463
|
+
|
|
370
464
|
=== Preprocessing Filters
|
|
371
465
|
|
|
372
466
|
* **BCJ Filters** - Branch-Call-Jump filters for executables (x86, ARM, ARM64, PPC, SPARC, IA-64)
|
|
@@ -383,8 +477,10 @@ See link:readme-docs/preprocessing-filters.adoc[Preprocessing Filters Guide] for
|
|
|
383
477
|
* **RAR5** - Full read/write support with STORE and LZMA compression, multi-volume, solid archives (v0.3.0)
|
|
384
478
|
* **TAR** - Full read/write with POSIX extensions
|
|
385
479
|
* **ISO 9660** - Full read/write with Rock Ridge/Joliet
|
|
386
|
-
* **CPIO** - Full read/write (newc, CRC formats)
|
|
480
|
+
* **CPIO** - Full read/write (newc, CRC formats) with RPM payload support (v0.4.0)
|
|
481
|
+
* **RPM** - Full read support with metadata extraction, gzip/bzip2/xz/zstd payload decompression (v0.4.0)
|
|
387
482
|
* **XAR** - Full read/write with XML TOC, gzip/bzip2/lzma compression (v0.4.0)
|
|
483
|
+
* **OLE** - Full read support for Microsoft compound documents (.doc, .xls, .ppt, .msi) (v0.4.0)
|
|
388
484
|
* **GZIP/XZ/BZIP2** - Single file compression formats
|
|
389
485
|
|
|
390
486
|
See link:readme-docs/archive-formats.adoc[Archive Formats Documentation] for complete details.
|
|
@@ -420,11 +516,12 @@ See link:readme-docs/advanced-features.adoc[Advanced Features Guide] for details
|
|
|
420
516
|
|
|
421
517
|
Omnizip maintains comprehensive test coverage:
|
|
422
518
|
|
|
423
|
-
* **Total Tests**:
|
|
519
|
+
* **Total Tests**: 3540+ examples
|
|
424
520
|
* **Pass Rate**: 100% (0 failures, 0 pending)
|
|
425
521
|
* **Coverage**: All compression algorithms, archive formats, and features
|
|
426
522
|
* **Integration**: Full round-trip verification for all formats
|
|
427
523
|
* **Reference Tests**: libarchive RAR4/RAR5 compatibility verified (103 test files)
|
|
524
|
+
* **New Formats**: RPM (21 tests), CPIO (25 tests), OLE (36 tests), XAR (17 tests)
|
|
428
525
|
|
|
429
526
|
== Quick Start
|
|
430
527
|
|
|
@@ -7,7 +7,7 @@ has_children: true
|
|
|
7
7
|
|
|
8
8
|
== Archive Formats Overview
|
|
9
9
|
|
|
10
|
-
Omnizip supports
|
|
10
|
+
Omnizip supports 12 archive formats for different use cases. Each format has unique features, compression options, and compatibility characteristics.
|
|
11
11
|
|
|
12
12
|
== Format Comparison
|
|
13
13
|
|
|
@@ -44,6 +44,16 @@ Omnizip supports 10 archive formats for different use cases. Each format has uni
|
|
|
44
44
|
|LZMA2
|
|
45
45
|
|None
|
|
46
46
|
|System package compression
|
|
47
|
+
|
|
48
|
+
|link:rpm-format.html[RPM]
|
|
49
|
+
|gzip, bzip2, xz, zstd
|
|
50
|
+
|GPG signatures
|
|
51
|
+
|Linux package management
|
|
52
|
+
|
|
53
|
+
|link:ole-format.html[OLE]
|
|
54
|
+
|None (internal structure)
|
|
55
|
+
|Document-specific
|
|
56
|
+
|Windows compound files, MSI
|
|
47
57
|
|===
|
|
48
58
|
|
|
49
59
|
== Choosing a Format
|
|
@@ -71,6 +81,12 @@ Omnizip supports 10 archive formats for different use cases. Each format has uni
|
|
|
71
81
|
|
|
72
82
|
|Software packages
|
|
73
83
|
|7z or TAR.XZ
|
|
84
|
+
|
|
85
|
+
|Linux package management
|
|
86
|
+
|RPM
|
|
87
|
+
|
|
88
|
+
|Windows compound documents
|
|
89
|
+
|OLE (MSI, DOC, XLS, PPT)
|
|
74
90
|
|===
|
|
75
91
|
|
|
76
92
|
=== By Compatibility
|
|
@@ -123,6 +139,20 @@ Omnizip supports 10 archive formats for different use cases. Each format has uni
|
|
|
123
139
|
* **POSIX**: Full support
|
|
124
140
|
* **Unix Standard**: Yes
|
|
125
141
|
|
|
142
|
+
=== RPM Format
|
|
143
|
+
|
|
144
|
+
* **Compression**: gzip, bzip2, xz, zstd (payload)
|
|
145
|
+
* **Encryption**: GPG signatures
|
|
146
|
+
* **Payload**: CPIO archive
|
|
147
|
+
* **Metadata**: Rich package information
|
|
148
|
+
|
|
149
|
+
=== OLE Format
|
|
150
|
+
|
|
151
|
+
* **Structure**: FAT-like file system
|
|
152
|
+
* **Streams**: Multiple streams and sub-storages
|
|
153
|
+
* **Types**: MSI, DOC, XLS, PPT, MSG
|
|
154
|
+
* **Metadata**: Summary information streams
|
|
155
|
+
|
|
126
156
|
== See Also
|
|
127
157
|
|
|
128
158
|
* link:../compression-algorithms/[Compression Algorithms] - Choose the right compression
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: OLE Format
|
|
3
|
+
nav_order: 10
|
|
4
|
+
parent: Archive Formats
|
|
5
|
+
grand_parent: Guides
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
[[ole-format]]
|
|
9
|
+
== Purpose
|
|
10
|
+
|
|
11
|
+
OLE (Object Linking and Embedding) Compound Document format is used by Microsoft for storing structured data in a single file. Common OLE files include:
|
|
12
|
+
- MSI (Windows Installer packages)
|
|
13
|
+
- DOC (legacy Word documents)
|
|
14
|
+
- XLS (legacy Excel spreadsheets)
|
|
15
|
+
- PPT (legacy PowerPoint presentations)
|
|
16
|
+
- THM (Microsoft Theme files)
|
|
17
|
+
- MSG (Outlook messages)
|
|
18
|
+
|
|
19
|
+
OLE uses a FAT-like file system within a single file, supporting up to 4GB of structured data.
|
|
20
|
+
|
|
21
|
+
== Key Characteristics
|
|
22
|
+
|
|
23
|
+
[cols="1,3"]
|
|
24
|
+
|===
|
|
25
|
+
|Property |Value
|
|
26
|
+
|
|
27
|
+
|Max Size
|
|
28
|
+
|2GB (standard), 4GB (with 4KB sectors)
|
|
29
|
+
|
|
30
|
+
|Sector Sizes
|
|
31
|
+
|512 bytes (standard), 4KB (large)
|
|
32
|
+
|
|
33
|
+
|Streams
|
|
34
|
+
|Multiple streams and sub-storages
|
|
35
|
+
|
|
36
|
+
|Encryption
|
|
37
|
+
|Optional (document-specific)
|
|
38
|
+
|
|
39
|
+
|Best For
|
|
40
|
+
|Windows compound files, legacy Office documents, MSI installers
|
|
41
|
+
|===
|
|
42
|
+
|
|
43
|
+
== Basic Usage
|
|
44
|
+
|
|
45
|
+
=== Open an OLE File
|
|
46
|
+
|
|
47
|
+
[source,ruby]
|
|
48
|
+
----
|
|
49
|
+
# Open OLE compound document
|
|
50
|
+
ole = Omnizip::Ole.open('document.doc')
|
|
51
|
+
|
|
52
|
+
# Access root storage
|
|
53
|
+
root = ole.root
|
|
54
|
+
puts "Root storage: #{root.name}"
|
|
55
|
+
|
|
56
|
+
# List streams and storages
|
|
57
|
+
root.each do |entry|
|
|
58
|
+
puts "#{entry.name} (#{entry.size} bytes)" if entry.file?
|
|
59
|
+
puts "#{entry.name}/" if entry.directory?
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
ole.close
|
|
63
|
+
----
|
|
64
|
+
|
|
65
|
+
=== Read Stream Data
|
|
66
|
+
|
|
67
|
+
[source,ruby]
|
|
68
|
+
----
|
|
69
|
+
# Read a specific stream
|
|
70
|
+
ole = Omnizip::Ole.open('document.doc')
|
|
71
|
+
|
|
72
|
+
# Access \x01SummaryInformation stream (document metadata)
|
|
73
|
+
summary = ole.open_stream("\x01SummaryInformation")
|
|
74
|
+
if summary
|
|
75
|
+
data = summary.read
|
|
76
|
+
puts "Summary size: #{data.size} bytes"
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Access WordDocument stream (main content)
|
|
80
|
+
word_stream = ole.open_stream("WordDocument")
|
|
81
|
+
if word_stream
|
|
82
|
+
content = word_stream.read
|
|
83
|
+
puts "Document content size: #{content.size} bytes"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
ole.close
|
|
87
|
+
----
|
|
88
|
+
|
|
89
|
+
=== Extract All Files
|
|
90
|
+
|
|
91
|
+
[source,ruby]
|
|
92
|
+
----
|
|
93
|
+
# Extract all streams from OLE to files
|
|
94
|
+
Omnizip::Ole.open('document.doc') do |ole|
|
|
95
|
+
ole.extract_to('/output/directory/')
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Extract specific stream
|
|
99
|
+
Omnizip::Ole.open('spreadsheet.xls') do |ole|
|
|
100
|
+
workbook = ole.open_stream('Workbook')
|
|
101
|
+
File.binwrite('workbook.bin', workbook.read)
|
|
102
|
+
end
|
|
103
|
+
----
|
|
104
|
+
|
|
105
|
+
== MSI Package Inspection
|
|
106
|
+
|
|
107
|
+
MSI files are OLE compound documents containing Windows Installer data:
|
|
108
|
+
|
|
109
|
+
[source,ruby]
|
|
110
|
+
----
|
|
111
|
+
# Open MSI package
|
|
112
|
+
msi = Omnizip::Ole.open('installer.msi')
|
|
113
|
+
|
|
114
|
+
# List MSI tables (storages)
|
|
115
|
+
tables = msi.root.select { |e| e.directory? }
|
|
116
|
+
puts "MSI Tables:"
|
|
117
|
+
tables.each { |t| puts " #{t.name}" }
|
|
118
|
+
|
|
119
|
+
# Access \x05SummaryInformation
|
|
120
|
+
summary = msi.open_stream("\x05SummaryInformation")
|
|
121
|
+
if summary
|
|
122
|
+
puts "Summary information found"
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Access _Tables stream (list of all tables)
|
|
126
|
+
tables_stream = msi.open_stream("_Tables")
|
|
127
|
+
if tables_stream
|
|
128
|
+
puts "Tables stream size: #{tables_stream.size}"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
msi.close
|
|
132
|
+
----
|
|
133
|
+
|
|
134
|
+
== Storage Structure
|
|
135
|
+
|
|
136
|
+
OLE files have a hierarchical structure like a file system:
|
|
137
|
+
|
|
138
|
+
[source]
|
|
139
|
+
----
|
|
140
|
+
Root Storage/
|
|
141
|
+
├── \x01SummaryInformation (document metadata)
|
|
142
|
+
├── \x01DocumentSummaryInformation
|
|
143
|
+
├── WordDocument (main content)
|
|
144
|
+
├── 1Table (data tables)
|
|
145
|
+
├── Data/
|
|
146
|
+
│ ├── Mso1Table
|
|
147
|
+
│ └── Mso0Table
|
|
148
|
+
└── ObjectPool/
|
|
149
|
+
└── _1000000000/
|
|
150
|
+
└── Ole10Native
|
|
151
|
+
----
|
|
152
|
+
|
|
153
|
+
[source,ruby]
|
|
154
|
+
----
|
|
155
|
+
# Navigate storage hierarchy
|
|
156
|
+
ole = Omnizip::Ole.open('document.doc')
|
|
157
|
+
|
|
158
|
+
# Access nested storage
|
|
159
|
+
pool = ole.root.find("ObjectPool")
|
|
160
|
+
if pool
|
|
161
|
+
pool.each do |entry|
|
|
162
|
+
puts "ObjectPool/#{entry.name}"
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
ole.close
|
|
167
|
+
----
|
|
168
|
+
|
|
169
|
+
== Summary Information
|
|
170
|
+
|
|
171
|
+
OLE documents contain standard metadata streams:
|
|
172
|
+
|
|
173
|
+
[source,ruby]
|
|
174
|
+
----
|
|
175
|
+
# Read summary information
|
|
176
|
+
ole = Omnizip::Ole.open('document.doc')
|
|
177
|
+
|
|
178
|
+
# Access summary stream
|
|
179
|
+
summary_stream = ole.open_stream("\x01SummaryInformation")
|
|
180
|
+
if summary_stream
|
|
181
|
+
# Summary information is in a specific binary format
|
|
182
|
+
# Omnizip provides helpers to decode it
|
|
183
|
+
summary = ole.summary_information
|
|
184
|
+
if summary
|
|
185
|
+
puts "Title: #{summary.title}"
|
|
186
|
+
puts "Author: #{summary.author}"
|
|
187
|
+
puts "Subject: #{summary.subject}"
|
|
188
|
+
puts "Keywords: #{summary.keywords}"
|
|
189
|
+
puts "Created: #{summary.create_time}"
|
|
190
|
+
puts "Modified: #{summary.last_save_time}"
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
ole.close
|
|
195
|
+
----
|
|
196
|
+
|
|
197
|
+
== File Format Detection
|
|
198
|
+
|
|
199
|
+
OLE files can be identified by their signature:
|
|
200
|
+
|
|
201
|
+
[source,ruby]
|
|
202
|
+
----
|
|
203
|
+
# Check if file is OLE
|
|
204
|
+
File.open('document.doc', 'rb') do |f|
|
|
205
|
+
header = f.read(8)
|
|
206
|
+
if Omnizip::Ole.ole_signature?(header)
|
|
207
|
+
puts "This is an OLE compound document"
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Get OLE file type
|
|
212
|
+
ole = Omnizip::Ole.open('document.doc')
|
|
213
|
+
case ole.file_type
|
|
214
|
+
when :word_document
|
|
215
|
+
puts "Microsoft Word document"
|
|
216
|
+
when :excel_spreadsheet
|
|
217
|
+
puts "Microsoft Excel spreadsheet"
|
|
218
|
+
when :powerpoint
|
|
219
|
+
puts "Microsoft PowerPoint presentation"
|
|
220
|
+
when :msi_package
|
|
221
|
+
puts "Windows Installer package"
|
|
222
|
+
else
|
|
223
|
+
puts "Unknown OLE type: #{ole.file_type}"
|
|
224
|
+
end
|
|
225
|
+
ole.close
|
|
226
|
+
----
|
|
227
|
+
|
|
228
|
+
== Supported File Types
|
|
229
|
+
|
|
230
|
+
Omnizip supports reading the following OLE-based formats:
|
|
231
|
+
|
|
232
|
+
[cols="2,1,1"]
|
|
233
|
+
|===
|
|
234
|
+
|Format |Extension |Read Support
|
|
235
|
+
|
|
236
|
+
|Microsoft Word
|
|
237
|
+
|.doc
|
|
238
|
+
|Yes
|
|
239
|
+
|
|
240
|
+
|Microsoft Excel
|
|
241
|
+
|.xls
|
|
242
|
+
|Yes
|
|
243
|
+
|
|
244
|
+
|Microsoft PowerPoint
|
|
245
|
+
|.ppt
|
|
246
|
+
|Yes
|
|
247
|
+
|
|
248
|
+
|Windows Installer
|
|
249
|
+
|.msi
|
|
250
|
+
|Yes
|
|
251
|
+
|
|
252
|
+
|Microsoft Theme
|
|
253
|
+
|.thm
|
|
254
|
+
|Yes
|
|
255
|
+
|
|
256
|
+
|Outlook Message
|
|
257
|
+
|.msg
|
|
258
|
+
|Yes
|
|
259
|
+
|
|
260
|
+
|Generic OLE
|
|
261
|
+
|.ole
|
|
262
|
+
|Yes
|
|
263
|
+
|===
|
|
264
|
+
|
|
265
|
+
== Low-Level Access
|
|
266
|
+
|
|
267
|
+
For advanced use cases, you can access the OLE internals directly:
|
|
268
|
+
|
|
269
|
+
[source,ruby]
|
|
270
|
+
----
|
|
271
|
+
# Access BAT (Block Allocation Table)
|
|
272
|
+
ole = Omnizip::Ole.open('document.doc')
|
|
273
|
+
|
|
274
|
+
# Get sector size
|
|
275
|
+
puts "Sector size: #{ole.sector_size}"
|
|
276
|
+
|
|
277
|
+
# Access BAT entries
|
|
278
|
+
bat = ole.bat
|
|
279
|
+
puts "BAT entries: #{bat.size}"
|
|
280
|
+
|
|
281
|
+
# Access SBAT (Small Block Allocation Table)
|
|
282
|
+
sbat = ole.sbat
|
|
283
|
+
puts "SBAT entries: #{sbat.size}" if sbat
|
|
284
|
+
|
|
285
|
+
# Access directory entries
|
|
286
|
+
ole.root.each do |entry|
|
|
287
|
+
puts "#{entry.name}: starting_sector=#{entry.starting_sector}, size=#{entry.size}"
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
ole.close
|
|
291
|
+
----
|
|
292
|
+
|
|
293
|
+
== Performance Considerations
|
|
294
|
+
|
|
295
|
+
OLE files can be large. For efficient reading:
|
|
296
|
+
|
|
297
|
+
[source,ruby]
|
|
298
|
+
----
|
|
299
|
+
# Stream read large OLE files efficiently
|
|
300
|
+
Omnizip::Ole.open('large.msi') do |ole|
|
|
301
|
+
# Only read streams you need
|
|
302
|
+
summary = ole.open_stream("\x05SummaryInformation")
|
|
303
|
+
if summary
|
|
304
|
+
# Read in chunks for large streams
|
|
305
|
+
chunk_size = 65536
|
|
306
|
+
while chunk = summary.read(chunk_size)
|
|
307
|
+
process(chunk)
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
----
|
|
312
|
+
|
|
313
|
+
== See Also
|
|
314
|
+
|
|
315
|
+
* link:seven-zip-format.html[7z Format] - Can contain OLE files
|
|
316
|
+
* link:zip-format.html[ZIP Format] - Modern Office uses ZIP (docx, xlsx, pptx)
|