mpp_reader 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ module MppReader
2
+ module Blocks
3
+ # Fixed-size records (tasks, resources, assignments), located by the
4
+ # offsets in a FixedMeta block: item i spans from its offset to the next
5
+ # item's offset (the last item runs to the end of the block). Ported from
6
+ # MPXJ FixedData; sizes are clamped to the available bytes.
7
+ class FixedData
8
+ attr_reader :item_count
9
+
10
+ # For blocks whose meta offsets are unreliable but whose record size
11
+ # is fixed and known (e.g. assignment data, 110 bytes per record).
12
+ def self.fixed_size(data, item_size)
13
+ instance = allocate
14
+ instance.send(:init_fixed_size, data, item_size)
15
+ instance
16
+ end
17
+
18
+ # Items located by meta offsets but with a known fixed size,
19
+ # overriding the size implied by consecutive offsets (e.g. link
20
+ # records, 20 bytes each).
21
+ def self.meta_offsets_with_size(fixed_meta, data, item_size)
22
+ instance = allocate
23
+ instance.send(:init_meta_offsets_with_size, fixed_meta, data, item_size)
24
+ instance
25
+ end
26
+
27
+ def initialize(fixed_meta, data, max_expected_size: 0)
28
+ @item_count = fixed_meta.item_count
29
+ @items = Array.new(@item_count)
30
+
31
+ @item_count.times do |i|
32
+ offset = fixed_meta.item_offset(i)
33
+ next if offset.nil? || offset.negative? || offset > data.bytesize
34
+
35
+ size = if i + 1 == @item_count
36
+ data.bytesize - offset
37
+ else
38
+ fixed_meta.item_offset(i + 1).to_i - offset
39
+ end
40
+
41
+ available = data.bytesize - offset
42
+ if size.negative? || size > available
43
+ size = max_expected_size.zero? ? available : [max_expected_size, available].min
44
+ end
45
+ size = max_expected_size if !max_expected_size.zero? && size > max_expected_size
46
+
47
+ @items[i] = data.byteslice(offset, size) if size.positive?
48
+ end
49
+ end
50
+
51
+ def [](index) = @items[index]
52
+
53
+ # Index of the record starting at the given byte offset, or nil.
54
+ def index_from_offset(offset)
55
+ @offsets&.index(offset)
56
+ end
57
+
58
+ private
59
+
60
+ def init_fixed_size(data, item_size)
61
+ @item_count = data.bytesize / item_size
62
+ @offsets = Array.new(@item_count) { |i| i * item_size }
63
+ @items = Array.new(@item_count) { |i| data.byteslice(i * item_size, item_size) }
64
+ end
65
+
66
+ def init_meta_offsets_with_size(fixed_meta, data, item_size)
67
+ @item_count = fixed_meta.item_count
68
+ @offsets = Array.new(@item_count)
69
+ @items = Array.new(@item_count)
70
+ @item_count.times do |i|
71
+ offset = fixed_meta.item_offset(i)
72
+ next if offset.nil? || offset.negative? || offset > data.bytesize
73
+
74
+ size = [item_size, data.bytesize - offset].min
75
+ next unless size.positive?
76
+
77
+ @offsets[i] = offset
78
+ @items[i] = data.byteslice(offset, size)
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,62 @@
1
+ module MppReader
2
+ module Blocks
3
+ # Index for a FixedData block. Layout (ported from MPXJ FixedMeta):
4
+ # 16-byte header (magic 0xFADFADBA, unknown, item count, unknown), then
5
+ # fixed-size meta items. The item count in the header is unreliable; the
6
+ # real count is derived from the block size. Bytes 4-7 of each meta item
7
+ # hold the corresponding item's offset within the FixedData block.
8
+ class FixedMeta
9
+ MAGIC = 0xFADFADBA
10
+ HEADER_SIZE = 16
11
+
12
+ attr_reader :item_count, :header_item_count
13
+
14
+ # Some blocks (e.g. Fixed2Meta) have a version-dependent item size.
15
+ # Pick the candidate that divides the block evenly, preferring one
16
+ # whose item count matches a sibling block's; otherwise the closest
17
+ # fit by MPXJ's rule of thumb (header count * size vs available).
18
+ def self.with_derived_item_size(data, candidates, sibling_item_count)
19
+ available = data.bytesize - HEADER_SIZE
20
+ header_count = data.byteslice(8, 4).to_s.unpack1("V").to_i
21
+ chosen = nil
22
+ best_distance = nil
23
+ candidates.each do |size|
24
+ next unless (available % size).zero?
25
+
26
+ if available / size == sibling_item_count
27
+ chosen = size
28
+ break
29
+ end
30
+ distance = header_count * size - available
31
+ if distance <= 0 && (best_distance.nil? || distance > best_distance)
32
+ chosen = size
33
+ best_distance = distance
34
+ end
35
+ end
36
+ new(data, chosen || candidates.first)
37
+ end
38
+
39
+ def initialize(data, item_size)
40
+ magic = data.byteslice(0, 4).to_s.unpack1("V")
41
+ unless magic == MAGIC
42
+ raise CorruptFileError, format("bad FixedMeta magic 0x%08x", magic.to_i)
43
+ end
44
+
45
+ # The header count can be lower than the block-derived count when
46
+ # stale records linger at the end; some readers must honour it.
47
+ @header_item_count = data.byteslice(8, 4).unpack1("V")
48
+ @item_count = (data.bytesize - HEADER_SIZE) / item_size
49
+ @items = Array.new(@item_count) do |i|
50
+ data.byteslice(HEADER_SIZE + i * item_size, item_size)
51
+ end
52
+ end
53
+
54
+ def [](index) = @items[index]
55
+
56
+ def item_offset(index)
57
+ item = @items[index]
58
+ item && item.byteslice(4, 4).to_s.unpack1("V")
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,47 @@
1
+ module MppReader
2
+ module Blocks
3
+ # A Props block: a map of integer keys to raw byte values, used for
4
+ # project-level settings and the field-map definitions. Layout (ported
5
+ # from MPXJ Props14): 16-byte header with the entry count as a u16 at
6
+ # offset 12, then entries of (size:u32, key:u32, unknown:u32, data[size]),
7
+ # each padded to a 2-byte boundary.
8
+ class Props
9
+ def initialize(data)
10
+ @map = {}
11
+ count = data.byteslice(12, 2).to_s.unpack1("v").to_i
12
+ pos = 16
13
+ count.times do
14
+ break if pos + 12 > data.bytesize
15
+
16
+ size, key = data.byteslice(pos, 8).unpack("VV")
17
+ pos += 12
18
+ break if size < 1 || pos + size > data.bytesize
19
+
20
+ @map[key] = data.byteslice(pos, size)
21
+ pos += size + (size.odd? ? 1 : 0)
22
+ end
23
+ end
24
+
25
+ def [](key) = @map[key]
26
+
27
+ def int(key)
28
+ bytes = @map[key]
29
+ bytes && bytes.bytesize >= 4 ? bytes.unpack1("V") : nil
30
+ end
31
+
32
+ def short(key)
33
+ bytes = @map[key]
34
+ bytes && bytes.bytesize >= 2 ? bytes.unpack1("v") : nil
35
+ end
36
+
37
+ def unicode_string(key)
38
+ bytes = @map[key]
39
+ return nil unless bytes
40
+
41
+ bytes.force_encoding(Encoding::UTF_16LE)
42
+ .encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
43
+ .sub(/\0.*\z/m, "")
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,39 @@
1
+ module MppReader
2
+ module Blocks
3
+ # Variable-length field values, indexed by a VarMeta block. Each item is
4
+ # a u32 size followed by the payload (ported from MPXJ Var2Data).
5
+ class Var2Data
6
+ def initialize(var_meta, data)
7
+ @meta = var_meta
8
+ @data = data
9
+ end
10
+
11
+ def bytes(unique_id, type)
12
+ offset = @meta.offset(unique_id, type)
13
+ return nil if offset.nil? || offset + 4 > @data.bytesize
14
+
15
+ size = @data.byteslice(offset, 4).unpack1("V")
16
+ return nil if size.negative? || offset + 4 + size > @data.bytesize
17
+
18
+ @data.byteslice(offset + 4, size)
19
+ end
20
+
21
+ # Single-byte NUL-terminated string (used by notes/RTF fields).
22
+ def byte_string(unique_id, type)
23
+ value = bytes(unique_id, type)
24
+ return nil unless value
25
+
26
+ value.sub(/\0.*\z/m, "")
27
+ end
28
+
29
+ def string(unique_id, type)
30
+ value = bytes(unique_id, type)
31
+ return nil unless value
32
+
33
+ value.force_encoding(Encoding::UTF_16LE)
34
+ .encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
35
+ .sub(/\0.*\z/m, "")
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,38 @@
1
+ module MppReader
2
+ module Blocks
3
+ # Index for a Var2Data block: maps (unique_id, type) to the offset of the
4
+ # item's payload. Layout (ported from MPXJ VarMeta12): 24-byte header
5
+ # (magic 0xFADFADBA or 0, unknown, item count, unknown x2, data size),
6
+ # then 12-byte entries of (unique_id:u32, offset:u32, type:u16, unknown:u16).
7
+ class VarMeta
8
+ MAGIC = 0xFADFADBA
9
+
10
+ def initialize(data)
11
+ magic = data.byteslice(0, 4).to_s.unpack1("V")
12
+ unless magic.nil? || magic.zero? || magic == MAGIC
13
+ raise CorruptFileError, format("bad VarMeta magic 0x%08x", magic)
14
+ end
15
+
16
+ item_count = data.byteslice(8, 4).to_s.unpack1("V").to_i
17
+ @table = Hash.new { |h, k| h[k] = {} }
18
+ pos = 24
19
+ item_count.times do
20
+ break if pos + 12 > data.bytesize
21
+
22
+ unique_id, offset, type = data.byteslice(pos, 12).unpack("VVv")
23
+ @table[unique_id][type] = offset
24
+ pos += 12
25
+ end
26
+ end
27
+
28
+ def unique_ids = @table.keys.sort
29
+
30
+ def entries?(unique_id) = @table.key?(unique_id)
31
+
32
+ def offset(unique_id, type)
33
+ entry = @table.fetch(unique_id, nil)
34
+ entry && entry[type]
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,19 @@
1
+ module MppReader
2
+ # Working-time definition. days maps :sunday..:saturday to
3
+ # { type:, hours: } where type is :working, :non_working or :default
4
+ # (inherit from the base calendar) and hours is an array of
5
+ # [start_seconds, end_seconds] ranges within the day. exceptions is an
6
+ # array of { from:, to:, name:, hours: } overrides; empty hours means a
7
+ # non-working period.
8
+ class Calendar
9
+ DAY_NAMES = %i[sunday monday tuesday wednesday thursday friday saturday].freeze
10
+
11
+ attr_accessor :unique_id, :name, :base_calendar_unique_id, :resource_unique_id
12
+ attr_reader :days, :exceptions
13
+
14
+ def initialize
15
+ @days = {}
16
+ @exceptions = []
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,84 @@
1
+ module MppReader
2
+ module Cfbf
3
+ # One 128-byte directory entry: a storage (folder), stream (file), or root.
4
+ class Entry
5
+ TYPE_STORAGE = 1
6
+ TYPE_STREAM = 2
7
+ TYPE_ROOT = 5
8
+
9
+ attr_reader :name, :type, :left, :right, :child, :start_sector, :size
10
+ attr_accessor :children
11
+
12
+ def initialize(record)
13
+ name_len = record.byteslice(64, 2).unpack1("v")
14
+ name_len = 64 if name_len > 64
15
+ @name = if name_len >= 2
16
+ record.byteslice(0, name_len - 2)
17
+ .force_encoding(Encoding::UTF_16LE)
18
+ .encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
19
+ else
20
+ ""
21
+ end
22
+ @type = record.getbyte(66)
23
+ @left, @right, @child = record.byteslice(68, 12).unpack("V3")
24
+ @start_sector = record.byteslice(116, 4).unpack1("V")
25
+ @size = record.byteslice(120, 8).unpack1("Q<")
26
+ @children = {}
27
+ end
28
+
29
+ def storage? = @type == TYPE_STORAGE || @type == TYPE_ROOT
30
+ def stream? = @type == TYPE_STREAM
31
+ end
32
+
33
+ # Parses the directory sector chain and links each storage's red-black
34
+ # sibling tree into a flat children hash (keyed by upcased name).
35
+ class Directory
36
+ attr_reader :root
37
+
38
+ def initialize(dir_bytes)
39
+ @entries = []
40
+ (dir_bytes.bytesize / 128).times do |i|
41
+ record = dir_bytes.byteslice(i * 128, 128)
42
+ @entries << (record.getbyte(66).to_i.zero? ? nil : Entry.new(record))
43
+ end
44
+ @root = @entries[0]
45
+ unless @root && @root.type == Entry::TYPE_ROOT
46
+ raise CorruptFileError, "compound file has no root directory entry"
47
+ end
48
+ link_all_children
49
+ end
50
+
51
+ private
52
+
53
+ # Iterative directory walk with a single visited-set shared across all
54
+ # storages. Each work-queue item is [parent_entry, child_index].
55
+ # Raises CorruptFileError if any entry index is visited more than once
56
+ # (catches both sibling-tree cycles and cross-storage back-references).
57
+ def link_all_children
58
+ visited = {}
59
+ # queue items: [parent_entry, entry_index_to_process]
60
+ queue = []
61
+ queue.push([@root, @root.child]) unless @root.child == NOSTREAM
62
+
63
+ until queue.empty?
64
+ parent, index = queue.shift
65
+ next if index == NOSTREAM
66
+ raise CorruptFileError, "directory entry cycle at index #{index}" if visited[index]
67
+ visited[index] = true
68
+
69
+ child = @entries[index]
70
+ raise CorruptFileError, "directory references missing entry #{index}" unless child
71
+
72
+ parent.children[child.name.upcase] = child
73
+
74
+ queue.push([parent, child.left]) unless child.left == NOSTREAM
75
+ queue.push([parent, child.right]) unless child.right == NOSTREAM
76
+
77
+ if child.storage? && child.child != NOSTREAM
78
+ queue.push([child, child.child])
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,75 @@
1
+ module MppReader
2
+ module Cfbf
3
+ # The File Allocation Table: maps each sector index to the next sector in
4
+ # its chain. Built from the header DIFAT plus chained DIFAT sectors.
5
+ class Fat
6
+ def initialize(data, header)
7
+ @data = data
8
+ @header = header
9
+ @entries = build_entries
10
+ end
11
+
12
+ def sector_bytes(sector)
13
+ offset = (sector + 1) * @header.sector_size
14
+ bytes = @data.byteslice(offset, @header.sector_size)
15
+ if bytes.nil? || bytes.empty?
16
+ raise CorruptFileError, "sector #{sector} beyond end of file"
17
+ end
18
+ bytes
19
+ end
20
+
21
+ def chain(start)
22
+ sectors = []
23
+ seen = {}
24
+ sector = start
25
+ while sector != ENDOFCHAIN
26
+ raise CorruptFileError, "FAT chain cycle at sector #{sector}" if seen[sector]
27
+ if sector >= @entries.size
28
+ raise CorruptFileError, "FAT chain references invalid sector #{sector}"
29
+ end
30
+ seen[sector] = true
31
+ sectors << sector
32
+ sector = @entries[sector]
33
+ end
34
+ sectors
35
+ end
36
+
37
+ def read_chain(start)
38
+ chain(start).map { |s| sector_bytes(s) }.join
39
+ end
40
+
41
+ private
42
+
43
+ def build_entries
44
+ max_sectors = @data.bytesize / @header.sector_size
45
+ fat_sectors = @header.difat_head.reject { |s| s == FREESECT }
46
+ difat_sector = @header.first_difat_sector
47
+ seen_difat = {}
48
+ iterations = 0
49
+ while difat_sector != ENDOFCHAIN && difat_sector != FREESECT
50
+ if seen_difat[difat_sector]
51
+ raise CorruptFileError, "DIFAT sector cycle at #{difat_sector}"
52
+ end
53
+ seen_difat[difat_sector] = true
54
+ iterations += 1
55
+ if iterations > max_sectors
56
+ raise CorruptFileError, "DIFAT chain exceeds file size"
57
+ end
58
+ slice = sector_bytes(difat_sector)
59
+ if slice.bytesize < @header.sector_size
60
+ raise CorruptFileError, "DIFAT sector #{difat_sector} is truncated"
61
+ end
62
+ values = slice.unpack("V*")
63
+ next_difat = values.pop
64
+ fat_sectors.concat(values.reject { |s| s == FREESECT })
65
+ # M2: stop appending if fat_sectors count would exceed max possible sectors
66
+ if fat_sectors.size > max_sectors
67
+ raise CorruptFileError, "FAT sector list exceeds file size"
68
+ end
69
+ difat_sector = next_difat
70
+ end
71
+ fat_sectors.flat_map { |s| sector_bytes(s).unpack("V*") }
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,114 @@
1
+ module MppReader
2
+ module Cfbf
3
+ # The assembled compound file: directory tree plus stream extraction.
4
+ # Streams smaller than the mini-stream cutoff (4096) live in the
5
+ # ministream and are chained through the miniFAT; larger streams are
6
+ # chained directly through the FAT.
7
+ class File
8
+ attr_reader :root
9
+
10
+ # Accepts a filesystem path, a binary String of file content, or an IO.
11
+ # Strings beginning with the OLE signature are treated as content;
12
+ # all other Strings are treated as filesystem paths.
13
+ def self.read(source)
14
+ data =
15
+ if source.is_a?(::String)
16
+ b = source.b
17
+ if b.byteslice(0, 8) == Header::SIGNATURE
18
+ b
19
+ else
20
+ begin
21
+ ::File.binread(source)
22
+ rescue Errno::ENAMETOOLONG, Errno::EINVAL, ArgumentError
23
+ raise InvalidFormatError, "not an OLE2 file and not a readable path"
24
+ end
25
+ end
26
+ elsif source.respond_to?(:read)
27
+ source.read.b
28
+ else
29
+ raise ArgumentError, "cannot read compound file from #{source.class}"
30
+ end
31
+ new(data)
32
+ end
33
+
34
+ def initialize(data)
35
+ @data = data.encoding == Encoding::BINARY ? data : data.b
36
+ raise InvalidFormatError, "file too small to be an OLE2 file" if @data.bytesize < 512
37
+ @header = Header.new(@data.byteslice(0, 512))
38
+ @fat = Fat.new(@data, @header)
39
+ @directory = Directory.new(@fat.read_chain(@header.first_dir_sector))
40
+ @root = @directory.root
41
+ @mini_stream =
42
+ if @root.size.zero?
43
+ "".b
44
+ else
45
+ if @root.size > @data.bytesize
46
+ raise CorruptFileError, "root ministream size #{@root.size} exceeds file size"
47
+ end
48
+ chain_bytes = @fat.read_chain(@root.start_sector)
49
+ if chain_bytes.bytesize < @root.size
50
+ raise CorruptFileError, "root ministream chain shorter than declared size"
51
+ end
52
+ chain_bytes.byteslice(0, @root.size)
53
+ end
54
+ @minifat =
55
+ if @header.num_minifat_sectors.positive? && @header.first_minifat_sector != ENDOFCHAIN
56
+ @fat.read_chain(@header.first_minifat_sector).unpack("V*")
57
+ else
58
+ []
59
+ end
60
+ end
61
+
62
+ # Path components separated by "/", matched case-insensitively.
63
+ def entry(path)
64
+ current = @root
65
+ path.split("/").each do |part|
66
+ current = current.children[part.upcase]
67
+ return nil unless current
68
+ end
69
+ current
70
+ end
71
+
72
+ def stream(path)
73
+ e = entry(path)
74
+ e&.stream? ? read_stream(e) : nil
75
+ end
76
+
77
+ def read_stream(entry)
78
+ return "".b if entry.size.zero?
79
+ if entry.size > @data.bytesize
80
+ raise CorruptFileError, "stream entry size #{entry.size} exceeds file size"
81
+ end
82
+ if entry.size < @header.mini_stream_cutoff
83
+ read_mini_stream(entry)
84
+ else
85
+ chain_bytes = @fat.read_chain(entry.start_sector)
86
+ if chain_bytes.bytesize < entry.size
87
+ raise CorruptFileError, "FAT stream chain shorter than declared size"
88
+ end
89
+ chain_bytes.byteslice(0, entry.size)
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ def read_mini_stream(entry)
96
+ out = +"".b
97
+ size = @header.mini_sector_size
98
+ sector = entry.start_sector
99
+ seen = {}
100
+ while sector != ENDOFCHAIN
101
+ if seen[sector] || sector >= @minifat.size
102
+ raise CorruptFileError, "broken miniFAT chain at #{sector}"
103
+ end
104
+ seen[sector] = true
105
+ slice = @mini_stream.byteslice(sector * size, size)
106
+ raise CorruptFileError, "mini-stream sector #{sector} out of range" if slice.nil?
107
+ out << slice
108
+ sector = @minifat[sector]
109
+ end
110
+ out.byteslice(0, entry.size)
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,40 @@
1
+ module MppReader
2
+ # Reader for the OLE2 / Compound File Binary Format ([MS-CFB]) used as the
3
+ # container of .mpp files. Vendored from msg-extractor-ruby (same author).
4
+ module Cfbf
5
+ FREESECT = 0xFFFFFFFF
6
+ ENDOFCHAIN = 0xFFFFFFFE
7
+ FATSECT = 0xFFFFFFFD
8
+ DIFSECT = 0xFFFFFFFC
9
+ NOSTREAM = 0xFFFFFFFF
10
+
11
+ class Header
12
+ SIGNATURE = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b.freeze
13
+
14
+ attr_reader :sector_size, :mini_sector_size, :num_fat_sectors,
15
+ :first_dir_sector, :mini_stream_cutoff,
16
+ :first_minifat_sector, :num_minifat_sectors,
17
+ :first_difat_sector, :num_difat_sectors, :difat_head
18
+
19
+ def initialize(bytes)
20
+ unless bytes && bytes.bytesize >= 512 && bytes.byteslice(0, 8) == SIGNATURE
21
+ raise InvalidFormatError, "not an OLE2 compound file (bad signature)"
22
+ end
23
+ sector_shift = bytes.byteslice(30, 2).unpack1("v")
24
+ mini_shift = bytes.byteslice(32, 2).unpack1("v")
25
+ unless sector_shift == 9 || sector_shift == 12
26
+ raise InvalidFormatError, "invalid sector shift #{sector_shift} (must be 9 or 12)"
27
+ end
28
+ unless mini_shift == 6
29
+ raise InvalidFormatError, "invalid mini sector shift #{mini_shift} (must be 6)"
30
+ end
31
+ @sector_size = 1 << sector_shift
32
+ @mini_sector_size = 1 << mini_shift
33
+ @num_fat_sectors, @first_dir_sector, _txn_sig, @mini_stream_cutoff,
34
+ @first_minifat_sector, @num_minifat_sectors,
35
+ @first_difat_sector, @num_difat_sectors = bytes.byteslice(44, 32).unpack("V8")
36
+ @difat_head = bytes.byteslice(76, 436).unpack("V109")
37
+ end
38
+ end
39
+ end
40
+ end