RubyGems - mpp_reader - Versions diffs - 0.1.0 - Mend

mpp_reader 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +36 -0
data/LICENSE.txt +504 -0
data/README.md +77 -0
data/exe/mpp_reader +5 -0
data/lib/mpp_reader/assignment.rb +8 -0
data/lib/mpp_reader/blocks/fixed_data.rb +83 -0
data/lib/mpp_reader/blocks/fixed_meta.rb +62 -0
data/lib/mpp_reader/blocks/props.rb +47 -0
data/lib/mpp_reader/blocks/var2_data.rb +39 -0
data/lib/mpp_reader/blocks/var_meta.rb +38 -0
data/lib/mpp_reader/calendar.rb +19 -0
data/lib/mpp_reader/cfbf/directory.rb +84 -0
data/lib/mpp_reader/cfbf/fat.rb +75 -0
data/lib/mpp_reader/cfbf/file.rb +114 -0
data/lib/mpp_reader/cfbf/header.rb +40 -0
data/lib/mpp_reader/cli.rb +110 -0
data/lib/mpp_reader/comp_obj.rb +37 -0
data/lib/mpp_reader/decode.rb +105 -0
data/lib/mpp_reader/errors.rb +13 -0
data/lib/mpp_reader/field_map.rb +106 -0
data/lib/mpp_reader/field_reader.rb +119 -0
data/lib/mpp_reader/field_tables.rb +4156 -0
data/lib/mpp_reader/project.rb +70 -0
data/lib/mpp_reader/reader14.rb +455 -0
data/lib/mpp_reader/relation.rb +11 -0
data/lib/mpp_reader/resource.rb +5 -0
data/lib/mpp_reader/rtf_text.rb +92 -0
data/lib/mpp_reader/task.rb +21 -0
data/lib/mpp_reader/version.rb +3 -0
data/lib/mpp_reader.rb +66 -0
metadata +77 -0

data/lib/mpp_reader/blocks/fixed_data.rb ADDED Viewed

@@ -0,0 +1,83 @@
+module MppReader
+  module Blocks
+    # Fixed-size records (tasks, resources, assignments), located by the
+    # offsets in a FixedMeta block: item i spans from its offset to the next
+    # item's offset (the last item runs to the end of the block). Ported from
+    # MPXJ FixedData; sizes are clamped to the available bytes.
+    class FixedData
+      attr_reader :item_count
+      # For blocks whose meta offsets are unreliable but whose record size
+      # is fixed and known (e.g. assignment data, 110 bytes per record).
+      def self.fixed_size(data, item_size)
+        instance = allocate
+        instance.send(:init_fixed_size, data, item_size)
+        instance
+      end
+      # Items located by meta offsets but with a known fixed size,
+      # overriding the size implied by consecutive offsets (e.g. link
+      # records, 20 bytes each).
+      def self.meta_offsets_with_size(fixed_meta, data, item_size)
+        instance = allocate
+        instance.send(:init_meta_offsets_with_size, fixed_meta, data, item_size)
+        instance
+      end
+      def initialize(fixed_meta, data, max_expected_size: 0)
+        @item_count = fixed_meta.item_count
+        @items = Array.new(@item_count)
+        @item_count.times do |i|
+          offset = fixed_meta.item_offset(i)
+          next if offset.nil? || offset.negative? || offset > data.bytesize
+          size = if i + 1 == @item_count
+                   data.bytesize - offset
+                 else
+                   fixed_meta.item_offset(i + 1).to_i - offset
+                 end
+          available = data.bytesize - offset
+          if size.negative? || size > available
+            size = max_expected_size.zero? ? available : [max_expected_size, available].min
+          end
+          size = max_expected_size if !max_expected_size.zero? && size > max_expected_size
+          @items[i] = data.byteslice(offset, size) if size.positive?
+        end
+      end
+      def [](index) = @items[index]
+      # Index of the record starting at the given byte offset, or nil.
+      def index_from_offset(offset)
+        @offsets&.index(offset)
+      end
+      private
+      def init_fixed_size(data, item_size)
+        @item_count = data.bytesize / item_size
+        @offsets = Array.new(@item_count) { |i| i * item_size }
+        @items = Array.new(@item_count) { |i| data.byteslice(i * item_size, item_size) }
+      end
+      def init_meta_offsets_with_size(fixed_meta, data, item_size)
+        @item_count = fixed_meta.item_count
+        @offsets = Array.new(@item_count)
+        @items = Array.new(@item_count)
+        @item_count.times do |i|
+          offset = fixed_meta.item_offset(i)
+          next if offset.nil? || offset.negative? || offset > data.bytesize
+          size = [item_size, data.bytesize - offset].min
+          next unless size.positive?
+          @offsets[i] = offset
+          @items[i] = data.byteslice(offset, size)
+        end
+      end
+    end
+  end
+end

data/lib/mpp_reader/blocks/fixed_meta.rb ADDED Viewed

@@ -0,0 +1,62 @@
+module MppReader
+  module Blocks
+    # Index for a FixedData block. Layout (ported from MPXJ FixedMeta):
+    # 16-byte header (magic 0xFADFADBA, unknown, item count, unknown), then
+    # fixed-size meta items. The item count in the header is unreliable; the
+    # real count is derived from the block size. Bytes 4-7 of each meta item
+    # hold the corresponding item's offset within the FixedData block.
+    class FixedMeta
+      MAGIC = 0xFADFADBA
+      HEADER_SIZE = 16
+      attr_reader :item_count, :header_item_count
+      # Some blocks (e.g. Fixed2Meta) have a version-dependent item size.
+      # Pick the candidate that divides the block evenly, preferring one
+      # whose item count matches a sibling block's; otherwise the closest
+      # fit by MPXJ's rule of thumb (header count * size vs available).
+      def self.with_derived_item_size(data, candidates, sibling_item_count)
+        available = data.bytesize - HEADER_SIZE
+        header_count = data.byteslice(8, 4).to_s.unpack1("V").to_i
+        chosen = nil
+        best_distance = nil
+        candidates.each do |size|
+          next unless (available % size).zero?
+          if available / size == sibling_item_count
+            chosen = size
+            break
+          end
+          distance = header_count * size - available
+          if distance <= 0 && (best_distance.nil? || distance > best_distance)
+            chosen = size
+            best_distance = distance
+          end
+        end
+        new(data, chosen || candidates.first)
+      end
+      def initialize(data, item_size)
+        magic = data.byteslice(0, 4).to_s.unpack1("V")
+        unless magic == MAGIC
+          raise CorruptFileError, format("bad FixedMeta magic 0x%08x", magic.to_i)
+        end
+        # The header count can be lower than the block-derived count when
+        # stale records linger at the end; some readers must honour it.
+        @header_item_count = data.byteslice(8, 4).unpack1("V")
+        @item_count = (data.bytesize - HEADER_SIZE) / item_size
+        @items = Array.new(@item_count) do |i|
+          data.byteslice(HEADER_SIZE + i * item_size, item_size)
+        end
+      end
+      def [](index) = @items[index]
+      def item_offset(index)
+        item = @items[index]
+        item && item.byteslice(4, 4).to_s.unpack1("V")
+      end
+    end
+  end
+end

data/lib/mpp_reader/blocks/props.rb ADDED Viewed

@@ -0,0 +1,47 @@
+module MppReader
+  module Blocks
+    # A Props block: a map of integer keys to raw byte values, used for
+    # project-level settings and the field-map definitions. Layout (ported
+    # from MPXJ Props14): 16-byte header with the entry count as a u16 at
+    # offset 12, then entries of (size:u32, key:u32, unknown:u32, data[size]),
+    # each padded to a 2-byte boundary.
+    class Props
+      def initialize(data)
+        @map = {}
+        count = data.byteslice(12, 2).to_s.unpack1("v").to_i
+        pos = 16
+        count.times do
+          break if pos + 12 > data.bytesize
+          size, key = data.byteslice(pos, 8).unpack("VV")
+          pos += 12
+          break if size < 1 || pos + size > data.bytesize
+          @map[key] = data.byteslice(pos, size)
+          pos += size + (size.odd? ? 1 : 0)
+        end
+      end
+      def [](key) = @map[key]
+      def int(key)
+        bytes = @map[key]
+        bytes && bytes.bytesize >= 4 ? bytes.unpack1("V") : nil
+      end
+      def short(key)
+        bytes = @map[key]
+        bytes && bytes.bytesize >= 2 ? bytes.unpack1("v") : nil
+      end
+      def unicode_string(key)
+        bytes = @map[key]
+        return nil unless bytes
+        bytes.force_encoding(Encoding::UTF_16LE)
+             .encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
+             .sub(/\0.*\z/m, "")
+      end
+    end
+  end
+end

data/lib/mpp_reader/blocks/var2_data.rb ADDED Viewed

@@ -0,0 +1,39 @@
+module MppReader
+  module Blocks
+    # Variable-length field values, indexed by a VarMeta block. Each item is
+    # a u32 size followed by the payload (ported from MPXJ Var2Data).
+    class Var2Data
+      def initialize(var_meta, data)
+        @meta = var_meta
+        @data = data
+      end
+      def bytes(unique_id, type)
+        offset = @meta.offset(unique_id, type)
+        return nil if offset.nil? || offset + 4 > @data.bytesize
+        size = @data.byteslice(offset, 4).unpack1("V")
+        return nil if size.negative? || offset + 4 + size > @data.bytesize
+        @data.byteslice(offset + 4, size)
+      end
+      # Single-byte NUL-terminated string (used by notes/RTF fields).
+      def byte_string(unique_id, type)
+        value = bytes(unique_id, type)
+        return nil unless value
+        value.sub(/\0.*\z/m, "")
+      end
+      def string(unique_id, type)
+        value = bytes(unique_id, type)
+        return nil unless value
+        value.force_encoding(Encoding::UTF_16LE)
+             .encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
+             .sub(/\0.*\z/m, "")
+      end
+    end
+  end
+end

data/lib/mpp_reader/blocks/var_meta.rb ADDED Viewed

@@ -0,0 +1,38 @@
+module MppReader
+  module Blocks
+    # Index for a Var2Data block: maps (unique_id, type) to the offset of the
+    # item's payload. Layout (ported from MPXJ VarMeta12): 24-byte header
+    # (magic 0xFADFADBA or 0, unknown, item count, unknown x2, data size),
+    # then 12-byte entries of (unique_id:u32, offset:u32, type:u16, unknown:u16).
+    class VarMeta
+      MAGIC = 0xFADFADBA
+      def initialize(data)
+        magic = data.byteslice(0, 4).to_s.unpack1("V")
+        unless magic.nil? || magic.zero? || magic == MAGIC
+          raise CorruptFileError, format("bad VarMeta magic 0x%08x", magic)
+        end
+        item_count = data.byteslice(8, 4).to_s.unpack1("V").to_i
+        @table = Hash.new { |h, k| h[k] = {} }
+        pos = 24
+        item_count.times do
+          break if pos + 12 > data.bytesize
+          unique_id, offset, type = data.byteslice(pos, 12).unpack("VVv")
+          @table[unique_id][type] = offset
+          pos += 12
+        end
+      end
+      def unique_ids = @table.keys.sort
+      def entries?(unique_id) = @table.key?(unique_id)
+      def offset(unique_id, type)
+        entry = @table.fetch(unique_id, nil)
+        entry && entry[type]
+      end
+    end
+  end
+end

data/lib/mpp_reader/calendar.rb ADDED Viewed

@@ -0,0 +1,19 @@
+module MppReader
+  # Working-time definition. days maps :sunday..:saturday to
+  # { type:, hours: } where type is :working, :non_working or :default
+  # (inherit from the base calendar) and hours is an array of
+  # [start_seconds, end_seconds] ranges within the day. exceptions is an
+  # array of { from:, to:, name:, hours: } overrides; empty hours means a
+  # non-working period.
+  class Calendar
+    DAY_NAMES = %i[sunday monday tuesday wednesday thursday friday saturday].freeze
+    attr_accessor :unique_id, :name, :base_calendar_unique_id, :resource_unique_id
+    attr_reader :days, :exceptions
+    def initialize
+      @days = {}
+      @exceptions = []
+    end
+  end
+end

data/lib/mpp_reader/cfbf/directory.rb ADDED Viewed

@@ -0,0 +1,84 @@
+module MppReader
+  module Cfbf
+    # One 128-byte directory entry: a storage (folder), stream (file), or root.
+    class Entry
+      TYPE_STORAGE = 1
+      TYPE_STREAM = 2
+      TYPE_ROOT = 5
+      attr_reader :name, :type, :left, :right, :child, :start_sector, :size
+      attr_accessor :children
+      def initialize(record)
+        name_len = record.byteslice(64, 2).unpack1("v")
+        name_len = 64 if name_len > 64
+        @name = if name_len >= 2
+                  record.byteslice(0, name_len - 2)
+                        .force_encoding(Encoding::UTF_16LE)
+                        .encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
+                else
+                  ""
+                end
+        @type = record.getbyte(66)
+        @left, @right, @child = record.byteslice(68, 12).unpack("V3")
+        @start_sector = record.byteslice(116, 4).unpack1("V")
+        @size = record.byteslice(120, 8).unpack1("Q<")
+        @children = {}
+      end
+      def storage? = @type == TYPE_STORAGE || @type == TYPE_ROOT
+      def stream? = @type == TYPE_STREAM
+    end
+    # Parses the directory sector chain and links each storage's red-black
+    # sibling tree into a flat children hash (keyed by upcased name).
+    class Directory
+      attr_reader :root
+      def initialize(dir_bytes)
+        @entries = []
+        (dir_bytes.bytesize / 128).times do |i|
+          record = dir_bytes.byteslice(i * 128, 128)
+          @entries << (record.getbyte(66).to_i.zero? ? nil : Entry.new(record))
+        end
+        @root = @entries[0]
+        unless @root && @root.type == Entry::TYPE_ROOT
+          raise CorruptFileError, "compound file has no root directory entry"
+        end
+        link_all_children
+      end
+      private
+      # Iterative directory walk with a single visited-set shared across all
+      # storages.  Each work-queue item is [parent_entry, child_index].
+      # Raises CorruptFileError if any entry index is visited more than once
+      # (catches both sibling-tree cycles and cross-storage back-references).
+      def link_all_children
+        visited = {}
+        # queue items: [parent_entry, entry_index_to_process]
+        queue = []
+        queue.push([@root, @root.child]) unless @root.child == NOSTREAM
+        until queue.empty?
+          parent, index = queue.shift
+          next if index == NOSTREAM
+          raise CorruptFileError, "directory entry cycle at index #{index}" if visited[index]
+          visited[index] = true
+          child = @entries[index]
+          raise CorruptFileError, "directory references missing entry #{index}" unless child
+          parent.children[child.name.upcase] = child
+          queue.push([parent, child.left])  unless child.left  == NOSTREAM
+          queue.push([parent, child.right]) unless child.right == NOSTREAM
+          if child.storage? && child.child != NOSTREAM
+            queue.push([child, child.child])
+          end
+        end
+      end
+    end
+  end
+end

data/lib/mpp_reader/cfbf/fat.rb ADDED Viewed

@@ -0,0 +1,75 @@
+module MppReader
+  module Cfbf
+    # The File Allocation Table: maps each sector index to the next sector in
+    # its chain. Built from the header DIFAT plus chained DIFAT sectors.
+    class Fat
+      def initialize(data, header)
+        @data = data
+        @header = header
+        @entries = build_entries
+      end
+      def sector_bytes(sector)
+        offset = (sector + 1) * @header.sector_size
+        bytes = @data.byteslice(offset, @header.sector_size)
+        if bytes.nil? || bytes.empty?
+          raise CorruptFileError, "sector #{sector} beyond end of file"
+        end
+        bytes
+      end
+      def chain(start)
+        sectors = []
+        seen = {}
+        sector = start
+        while sector != ENDOFCHAIN
+          raise CorruptFileError, "FAT chain cycle at sector #{sector}" if seen[sector]
+          if sector >= @entries.size
+            raise CorruptFileError, "FAT chain references invalid sector #{sector}"
+          end
+          seen[sector] = true
+          sectors << sector
+          sector = @entries[sector]
+        end
+        sectors
+      end
+      def read_chain(start)
+        chain(start).map { |s| sector_bytes(s) }.join
+      end
+      private
+      def build_entries
+        max_sectors = @data.bytesize / @header.sector_size
+        fat_sectors = @header.difat_head.reject { |s| s == FREESECT }
+        difat_sector = @header.first_difat_sector
+        seen_difat = {}
+        iterations = 0
+        while difat_sector != ENDOFCHAIN && difat_sector != FREESECT
+          if seen_difat[difat_sector]
+            raise CorruptFileError, "DIFAT sector cycle at #{difat_sector}"
+          end
+          seen_difat[difat_sector] = true
+          iterations += 1
+          if iterations > max_sectors
+            raise CorruptFileError, "DIFAT chain exceeds file size"
+          end
+          slice = sector_bytes(difat_sector)
+          if slice.bytesize < @header.sector_size
+            raise CorruptFileError, "DIFAT sector #{difat_sector} is truncated"
+          end
+          values = slice.unpack("V*")
+          next_difat = values.pop
+          fat_sectors.concat(values.reject { |s| s == FREESECT })
+          # M2: stop appending if fat_sectors count would exceed max possible sectors
+          if fat_sectors.size > max_sectors
+            raise CorruptFileError, "FAT sector list exceeds file size"
+          end
+          difat_sector = next_difat
+        end
+        fat_sectors.flat_map { |s| sector_bytes(s).unpack("V*") }
+      end
+    end
+  end
+end

data/lib/mpp_reader/cfbf/file.rb ADDED Viewed

@@ -0,0 +1,114 @@
+module MppReader
+  module Cfbf
+    # The assembled compound file: directory tree plus stream extraction.
+    # Streams smaller than the mini-stream cutoff (4096) live in the
+    # ministream and are chained through the miniFAT; larger streams are
+    # chained directly through the FAT.
+    class File
+      attr_reader :root
+      # Accepts a filesystem path, a binary String of file content, or an IO.
+      # Strings beginning with the OLE signature are treated as content;
+      # all other Strings are treated as filesystem paths.
+      def self.read(source)
+        data =
+          if source.is_a?(::String)
+            b = source.b
+            if b.byteslice(0, 8) == Header::SIGNATURE
+              b
+            else
+              begin
+                ::File.binread(source)
+              rescue Errno::ENAMETOOLONG, Errno::EINVAL, ArgumentError
+                raise InvalidFormatError, "not an OLE2 file and not a readable path"
+              end
+            end
+          elsif source.respond_to?(:read)
+            source.read.b
+          else
+            raise ArgumentError, "cannot read compound file from #{source.class}"
+          end
+        new(data)
+      end
+      def initialize(data)
+        @data = data.encoding == Encoding::BINARY ? data : data.b
+        raise InvalidFormatError, "file too small to be an OLE2 file" if @data.bytesize < 512
+        @header = Header.new(@data.byteslice(0, 512))
+        @fat = Fat.new(@data, @header)
+        @directory = Directory.new(@fat.read_chain(@header.first_dir_sector))
+        @root = @directory.root
+        @mini_stream =
+          if @root.size.zero?
+            "".b
+          else
+            if @root.size > @data.bytesize
+              raise CorruptFileError, "root ministream size #{@root.size} exceeds file size"
+            end
+            chain_bytes = @fat.read_chain(@root.start_sector)
+            if chain_bytes.bytesize < @root.size
+              raise CorruptFileError, "root ministream chain shorter than declared size"
+            end
+            chain_bytes.byteslice(0, @root.size)
+          end
+        @minifat =
+          if @header.num_minifat_sectors.positive? && @header.first_minifat_sector != ENDOFCHAIN
+            @fat.read_chain(@header.first_minifat_sector).unpack("V*")
+          else
+            []
+          end
+      end
+      # Path components separated by "/", matched case-insensitively.
+      def entry(path)
+        current = @root
+        path.split("/").each do |part|
+          current = current.children[part.upcase]
+          return nil unless current
+        end
+        current
+      end
+      def stream(path)
+        e = entry(path)
+        e&.stream? ? read_stream(e) : nil
+      end
+      def read_stream(entry)
+        return "".b if entry.size.zero?
+        if entry.size > @data.bytesize
+          raise CorruptFileError, "stream entry size #{entry.size} exceeds file size"
+        end
+        if entry.size < @header.mini_stream_cutoff
+          read_mini_stream(entry)
+        else
+          chain_bytes = @fat.read_chain(entry.start_sector)
+          if chain_bytes.bytesize < entry.size
+            raise CorruptFileError, "FAT stream chain shorter than declared size"
+          end
+          chain_bytes.byteslice(0, entry.size)
+        end
+      end
+      private
+      def read_mini_stream(entry)
+        out = +"".b
+        size = @header.mini_sector_size
+        sector = entry.start_sector
+        seen = {}
+        while sector != ENDOFCHAIN
+          if seen[sector] || sector >= @minifat.size
+            raise CorruptFileError, "broken miniFAT chain at #{sector}"
+          end
+          seen[sector] = true
+          slice = @mini_stream.byteslice(sector * size, size)
+          raise CorruptFileError, "mini-stream sector #{sector} out of range" if slice.nil?
+          out << slice
+          sector = @minifat[sector]
+        end
+        out.byteslice(0, entry.size)
+      end
+    end
+  end
+end

data/lib/mpp_reader/cfbf/header.rb ADDED Viewed

@@ -0,0 +1,40 @@
+module MppReader
+  # Reader for the OLE2 / Compound File Binary Format ([MS-CFB]) used as the
+  # container of .mpp files. Vendored from msg-extractor-ruby (same author).
+  module Cfbf
+    FREESECT   = 0xFFFFFFFF
+    ENDOFCHAIN = 0xFFFFFFFE
+    FATSECT    = 0xFFFFFFFD
+    DIFSECT    = 0xFFFFFFFC
+    NOSTREAM   = 0xFFFFFFFF
+    class Header
+      SIGNATURE = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b.freeze
+      attr_reader :sector_size, :mini_sector_size, :num_fat_sectors,
+                  :first_dir_sector, :mini_stream_cutoff,
+                  :first_minifat_sector, :num_minifat_sectors,
+                  :first_difat_sector, :num_difat_sectors, :difat_head
+      def initialize(bytes)
+        unless bytes && bytes.bytesize >= 512 && bytes.byteslice(0, 8) == SIGNATURE
+          raise InvalidFormatError, "not an OLE2 compound file (bad signature)"
+        end
+        sector_shift = bytes.byteslice(30, 2).unpack1("v")
+        mini_shift   = bytes.byteslice(32, 2).unpack1("v")
+        unless sector_shift == 9 || sector_shift == 12
+          raise InvalidFormatError, "invalid sector shift #{sector_shift} (must be 9 or 12)"
+        end
+        unless mini_shift == 6
+          raise InvalidFormatError, "invalid mini sector shift #{mini_shift} (must be 6)"
+        end
+        @sector_size = 1 << sector_shift
+        @mini_sector_size = 1 << mini_shift
+        @num_fat_sectors, @first_dir_sector, _txn_sig, @mini_stream_cutoff,
+          @first_minifat_sector, @num_minifat_sectors,
+          @first_difat_sector, @num_difat_sectors = bytes.byteslice(44, 32).unpack("V8")
+        @difat_head = bytes.byteslice(76, 436).unpack("V109")
+      end
+    end
+  end
+end