mpp_reader 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ require "optparse"
2
+ require "json"
3
+
4
+ module MppReader
5
+ class CLI
6
+ def self.run(argv, stdout: $stdout, stderr: $stderr)
7
+ options = { json: false }
8
+ parser = OptionParser.new do |opts|
9
+ opts.banner = "Usage: mpp_reader FILE... [options]"
10
+ opts.on("--json", "Print one JSON object per file to stdout instead of a task tree") do
11
+ options[:json] = true
12
+ end
13
+ opts.on("--version", "Print version") do
14
+ stdout.puts MppReader::VERSION
15
+ return 0
16
+ end
17
+ end
18
+
19
+ begin
20
+ files = parser.parse(argv)
21
+ rescue OptionParser::ParseError => e
22
+ stderr.puts e.message
23
+ stderr.puts parser.banner
24
+ return 2
25
+ end
26
+ if files.empty?
27
+ stderr.puts parser.banner
28
+ return 2
29
+ end
30
+
31
+ status = 0
32
+ files.each do |file|
33
+ project = MppReader.open(file)
34
+ if options[:json]
35
+ stdout.puts JSON.generate(json_for(project, file))
36
+ else
37
+ print_tree(project, file, stdout)
38
+ end
39
+ rescue MppReader::Error, SystemCallError => e
40
+ stderr.puts "#{file}: #{e.message}"
41
+ status = 1
42
+ end
43
+ status
44
+ end
45
+
46
+ def self.print_tree(project, file, stdout)
47
+ stdout.puts "#{file} (#{project.file_format}, #{project.application_name})"
48
+ stdout.puts "\nTasks (#{project.tasks.size}):"
49
+ project.tasks.each do |t|
50
+ indent = " " * (t.outline_level || 0)
51
+ dates = [t.start&.strftime("%Y-%m-%d"), t.finish&.strftime("%Y-%m-%d")].compact.join(" .. ")
52
+ extras = []
53
+ extras << "#{t.duration.value.round(2)} #{t.duration.units}" if t.duration
54
+ extras << "#{t.percent_complete}%" if t.percent_complete&.positive?
55
+ extras << "milestone" if t.milestone?
56
+ extras << "inactive" unless t.active
57
+ line = "#{indent}[#{t.unique_id}] #{t.name} #{dates} #{extras.join(', ')}".rstrip
58
+ stdout.puts line
59
+ stdout.puts "#{indent} > #{t.notes.gsub("\n", "\n#{indent} > ")}" if t.notes
60
+ end
61
+ stdout.puts "\nResources (#{project.resources.size}):"
62
+ project.resources.each { |r| stdout.puts " [#{r.unique_id}] #{r.name}" }
63
+ end
64
+
65
+ def self.json_for(project, file)
66
+ {
67
+ file: file,
68
+ format: project.file_format,
69
+ application: project.application_name,
70
+ tasks: project.tasks.map do |t|
71
+ {
72
+ unique_id: t.unique_id, id: t.id, name: t.name,
73
+ outline_level: t.outline_level,
74
+ start: iso(t.start), finish: iso(t.finish),
75
+ duration: duration(t.duration),
76
+ percent_complete: t.percent_complete,
77
+ milestone: t.milestone?, summary: t.summary?,
78
+ active: t.active, manually_scheduled: t.manual,
79
+ parent_unique_id: t.parent&.unique_id,
80
+ predecessors: t.predecessors.map do |r|
81
+ { predecessor_task_unique_id: r.predecessor_task_unique_id,
82
+ type: r.type, lag: duration(r.lag) }
83
+ end,
84
+ notes: t.notes
85
+ }.compact
86
+ end,
87
+ resources: project.resources.map do |r|
88
+ { unique_id: r.unique_id, id: r.id, name: r.name,
89
+ calendar_unique_id: r.calendar_unique_id, notes: r.notes }.compact
90
+ end,
91
+ assignments: project.assignments.map do |a|
92
+ { unique_id: a.unique_id, task_unique_id: a.task_unique_id,
93
+ resource_unique_id: a.resource_unique_id,
94
+ start: iso(a.start), finish: iso(a.finish),
95
+ units: a.units, work: duration(a.work), notes: a.notes }.compact
96
+ end,
97
+ calendars: project.calendars.map do |c|
98
+ { unique_id: c.unique_id, name: c.name,
99
+ base_calendar_unique_id: c.base_calendar_unique_id,
100
+ resource_unique_id: c.resource_unique_id,
101
+ days: c.days, exceptions: c.exceptions }.compact
102
+ end
103
+ }
104
+ end
105
+
106
+ def self.iso(time) = time&.strftime("%Y-%m-%dT%H:%M:%S")
107
+
108
+ def self.duration(d) = d && { value: d.value, units: d.units }
109
+ end
110
+ end
@@ -0,0 +1,37 @@
1
+ module MppReader
2
+ # The \x01CompObj stream identifies the application that wrote an OLE
3
+ # compound file and its file format. Layout (ported from MPXJ CompObj):
4
+ # 28-byte header, then length-prefixed NUL-terminated ANSI strings for
5
+ # application name, file format and application ID. "Microsoft Project 4.0"
6
+ # files carry only the application name.
7
+ class CompObj
8
+ attr_reader :application_name, :file_format, :application_id
9
+
10
+ def initialize(data)
11
+ @pos = 28
12
+ @data = data
13
+ @application_name = read_string
14
+ if @application_name == "Microsoft Project 4.0"
15
+ @file_format = "MSProject.MPP4"
16
+ @application_id = "MSProject.Project.4"
17
+ else
18
+ @file_format = read_string
19
+ @application_id = read_string
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def read_string
26
+ return nil if @pos + 4 > @data.bytesize
27
+
28
+ length = @data.byteslice(@pos, 4).unpack1("V")
29
+ @pos += 4
30
+ return nil if length <= 0 || @pos + length > @data.bytesize
31
+
32
+ str = @data.byteslice(@pos, length - 1) # drop trailing NUL
33
+ @pos += length
34
+ str
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,105 @@
1
+ require "date"
2
+
3
+ module MppReader
4
+ # A duration as MS Project models it: a magnitude in scheduling units
5
+ # (working time), not an absolute time span.
6
+ Duration = Struct.new(:value, :units)
7
+
8
+ # Decoders for MS Project's binary value encodings, ported from MPXJ
9
+ # MPPUtility. The epoch is 1983-12-31; dates are u16 days since the epoch;
10
+ # times of day and durations are stored in tenths of a minute; timestamps
11
+ # combine a u16 time (6-second units) with a u16 day count.
12
+ module Decode
13
+ EPOCH_DATE = Date.new(1983, 12, 31)
14
+
15
+ DURATION_UNITS_MASK = 0x1F
16
+ DURATION_UNITS = {
17
+ 3 => :minutes,
18
+ 4 => :elapsed_minutes,
19
+ 5 => :hours,
20
+ 6 => :elapsed_hours,
21
+ 7 => :days,
22
+ 8 => :elapsed_days,
23
+ 9 => :weeks,
24
+ 10 => :elapsed_weeks,
25
+ 11 => :months,
26
+ 12 => :elapsed_months,
27
+ 19 => :percent,
28
+ 20 => :elapsed_percent
29
+ }.freeze
30
+
31
+ # Divisors converting tenths-of-minutes to each unit, assuming MS
32
+ # Project's defaults (8h days, 40h weeks, 20-day months).
33
+ TENTHS_PER_UNIT = {
34
+ minutes: 10, elapsed_minutes: 10,
35
+ hours: 600, elapsed_hours: 600,
36
+ days: 4800, elapsed_days: 14_400,
37
+ weeks: 24_000, elapsed_weeks: 100_800,
38
+ months: 96_000, elapsed_months: 432_000
39
+ }.freeze
40
+
41
+ module_function
42
+
43
+ def date(data, offset)
44
+ days = data.byteslice(offset, 2).to_s.unpack1("v")
45
+ return nil if days.nil? || days == 0xFFFF
46
+
47
+ EPOCH_DATE + days
48
+ end
49
+
50
+ def timestamp(data, offset)
51
+ days = data.byteslice(offset + 2, 2).to_s.unpack1("v")
52
+ return nil if days.nil? || days <= 1 || days == 0xFFFF
53
+
54
+ time = data.byteslice(offset, 2).unpack1("v")
55
+ time = 0 if time == 0xFFFF
56
+ seconds = time * 6
57
+ # Very small day counts show as NA in MS Project; a non-zero seconds
58
+ # component distinguishes NA from real values (MPXJ heuristic).
59
+ return nil if days < 100 && (seconds % 60) != 0
60
+
61
+ to_time(EPOCH_DATE + days, seconds)
62
+ end
63
+
64
+ def timestamp_from_tenths(data, offset)
65
+ tenths = data.byteslice(offset, 4).to_s.unpack1("V")
66
+ return nil if tenths.nil?
67
+
68
+ seconds = tenths * 6
69
+ to_time(EPOCH_DATE + (seconds / 86_400), seconds % 86_400)
70
+ end
71
+
72
+ def duration_units(type, default: :days)
73
+ code = type & DURATION_UNITS_MASK
74
+ return default if code == 21
75
+
76
+ DURATION_UNITS.fetch(code, :days)
77
+ end
78
+
79
+ # value is in tenths of a minute; converts using MS Project's default
80
+ # hours-per-day assumptions.
81
+ def duration(value, units)
82
+ Duration.new(value.to_f / TENTHS_PER_UNIT.fetch(units, 1), units)
83
+ end
84
+
85
+ # Like duration, but day/week/month conversions honour the project's
86
+ # configured working time instead of the defaults.
87
+ def adjusted_duration(value, units, minutes_per_day:, minutes_per_week:, days_per_month:)
88
+ tenths_per_unit =
89
+ case units
90
+ when :days then minutes_per_day * 10
91
+ when :weeks then minutes_per_week * 10
92
+ when :months then minutes_per_day * days_per_month * 10
93
+ end
94
+ if tenths_per_unit.nil? || tenths_per_unit.zero?
95
+ duration(value, units)
96
+ else
97
+ Duration.new(value.to_f / tenths_per_unit, units)
98
+ end
99
+ end
100
+
101
+ def to_time(date, seconds_of_day)
102
+ Time.new(date.year, date.month, date.day) + seconds_of_day
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,13 @@
1
+ module MppReader
2
+ class Error < StandardError; end
3
+
4
+ # Not an OLE file, or an OLE file that is not an MS Project file.
5
+ class InvalidFormatError < Error; end
6
+
7
+ # A recognized MS Project file in a format this gem does not support
8
+ # (e.g. MPP8/MPP9/MPP12, or a password-protected file).
9
+ class UnsupportedFormatError < Error; end
10
+
11
+ # Structurally broken file (bad FAT chains, truncated streams, bad records).
12
+ class CorruptFileError < Error; end
13
+ end
@@ -0,0 +1,106 @@
1
+ require_relative "field_tables"
2
+
3
+ module MppReader
4
+ # Maps logical fields (task name, start, ...) to where their values live in
5
+ # an entity's data blocks. The definition is stored in the project Props
6
+ # under entity-specific keys, as 28-byte entries (ported from MPXJ
7
+ # FieldMap.createFieldMap): mask u32@0, fixed-data offset u16@4, field type
8
+ # u32@12, category u16@20.
9
+ #
10
+ # Locations: :fixed_data (offset into a FixedData record; a second fixed
11
+ # block is signalled by offsets restarting), :var_data (key into Var2Data;
12
+ # for MPP14 the key is the low word of the field type), :meta_data (bit
13
+ # mask over a FixedMeta record).
14
+ class FieldMap
15
+ Item = Struct.new(:location, :data_block, :fixed_offset, :var_key, :mask,
16
+ :meta_block, :data_type, :units_field)
17
+
18
+ # Bytes each data type occupies in a fixed-data record (MPXJ
19
+ # FieldMap#getFixedDataFieldSize); used to size-check records.
20
+ FIXED_DATA_SIZES = Hash.new(0).update(
21
+ date: 4, integer: 4, duration: 4,
22
+ time_units: 2, constraint: 2, priority: 2, percentage: 2, task_type: 2,
23
+ accrue: 2, short: 2, boolean: 2, delay: 2, workgroup: 2, rate_units: 2,
24
+ earned_value_method: 2, resource_request_type: 2,
25
+ currency: 8, units: 8, rate: 8, work: 8,
26
+ work_units: 1, guid: 16
27
+ ).freeze
28
+
29
+ ENTITIES = {
30
+ task: [FieldTables::TASK_FIELD_BASE, FieldTables::TASK_FIELDS,
31
+ FieldTables::TASK_FIELD_TYPES, [131_092, 50_331_668]],
32
+ resource: [FieldTables::RESOURCE_FIELD_BASE, FieldTables::RESOURCE_FIELDS,
33
+ FieldTables::RESOURCE_FIELD_TYPES, [131_093, 50_331_669]],
34
+ assignment: [FieldTables::ASSIGNMENT_FIELD_BASE, FieldTables::ASSIGNMENT_FIELDS,
35
+ FieldTables::ASSIGNMENT_FIELD_TYPES, [131_095, 50_331_671]]
36
+ }.freeze
37
+
38
+ def self.for_tasks(props) = from_props(props, :task)
39
+ def self.for_resources(props) = from_props(props, :resource)
40
+ def self.for_assignments(props) = from_props(props, :assignment)
41
+
42
+ def self.from_props(props, entity)
43
+ data = ENTITIES.fetch(entity)[3].filter_map { |key| props[key] }.first
44
+ if data.nil?
45
+ raise UnsupportedFormatError,
46
+ "no #{entity} field map in project Props - default field maps are not implemented"
47
+ end
48
+
49
+ new(data, entity: entity)
50
+ end
51
+
52
+ def initialize(data, entity:)
53
+ base, fields, types, = ENTITIES.fetch(entity)
54
+ @map = {}
55
+ @max_fixed_data_size = Hash.new(0)
56
+ last_offset = 0
57
+ data_block = 0
58
+ pos = 0
59
+
60
+ while pos + 28 <= data.bytesize
61
+ mask = data.byteslice(pos, 4).unpack1("V")
62
+ fixed_offset = data.byteslice(pos + 4, 2).unpack1("v")
63
+ type_value = data.byteslice(pos + 12, 4).unpack1("V")
64
+ category = data.byteslice(pos + 20, 2).unpack1("v")
65
+ pos += 28
66
+
67
+ next unless (type_value & 0xFFFF0000) == base
68
+
69
+ field = fields[type_value & 0xFFFF]
70
+ next if field.nil?
71
+
72
+ # NOTE: MPXJ substitutes var data keys for some custom fields
73
+ # (FieldMap14.VAR_DATA_MAP); not implemented yet.
74
+ var_key = type_value & 0xFFFF
75
+
76
+ item =
77
+ case category
78
+ when 0x0B then Item.new(:meta_data, 0, nil, nil, mask, 0)
79
+ when 0x64 then Item.new(:meta_data, 0, nil, nil, mask, 1)
80
+ else
81
+ if fixed_offset != 0xFFFF
82
+ data_block += 1 if fixed_offset < last_offset
83
+ last_offset = fixed_offset
84
+ Item.new(:fixed_data, data_block, fixed_offset, nil, mask, 0)
85
+ elsif var_key != 0
86
+ Item.new(:var_data, 0, nil, var_key, mask, 0)
87
+ end
88
+ end
89
+ next if item.nil?
90
+
91
+ item.data_type, item.units_field = types[field]
92
+ if item.location == :fixed_data
93
+ field_end = item.fixed_offset + FIXED_DATA_SIZES[item.data_type]
94
+ if field_end > @max_fixed_data_size[item.data_block]
95
+ @max_fixed_data_size[item.data_block] = field_end
96
+ end
97
+ end
98
+ @map[field] = item
99
+ end
100
+ end
101
+
102
+ def [](field) = @map[field]
103
+
104
+ def max_fixed_data_size(block) = @max_fixed_data_size[block]
105
+ end
106
+ end
@@ -0,0 +1,119 @@
1
+ module MppReader
2
+ # Decodes a single field value given its FieldMap::Item, the entity's
3
+ # fixed-data records and the var data block. Ported from MPXJ
4
+ # FieldMap.FieldItem#read; only the data types needed so far are
5
+ # implemented - unknown types decode to nil rather than raising.
6
+ class FieldReader
7
+ MINUTES_PER_DAY_KEY = 37_748_765
8
+ MINUTES_PER_WEEK_KEY = 37_748_766
9
+ DAYS_PER_MONTH_KEY = 37_753_743
10
+
11
+ def initialize(props)
12
+ @minutes_per_day = positive_or(props.int(MINUTES_PER_DAY_KEY), 480)
13
+ @minutes_per_week = positive_or(props.int(MINUTES_PER_WEEK_KEY), 2400)
14
+ @days_per_month = positive_or(props.short(DAYS_PER_MONTH_KEY), 20)
15
+ end
16
+
17
+ # fixed_records is an array of byte strings, one per fixed-data block
18
+ # (block 0 = FixedData, block 1 = Fixed2Data).
19
+ def read(field_map, field, unique_id, fixed_records, var_data)
20
+ item = field_map[field]
21
+ return nil if item.nil?
22
+
23
+ case item.location
24
+ when :fixed_data
25
+ record = fixed_records[item.data_block]
26
+ return nil if record.nil? || item.fixed_offset >= record.bytesize
27
+
28
+ read_fixed(field_map, item, record, unique_id, fixed_records, var_data)
29
+ when :var_data
30
+ read_var(field_map, item, unique_id, fixed_records, var_data)
31
+ end
32
+ end
33
+
34
+ # Converts a raw duration value (tenths of a minute) using the
35
+ # project's configured working time.
36
+ def adjusted_duration(value, units)
37
+ Decode.adjusted_duration(value, units,
38
+ minutes_per_day: @minutes_per_day,
39
+ minutes_per_week: @minutes_per_week,
40
+ days_per_month: @days_per_month)
41
+ end
42
+
43
+ private
44
+
45
+ def positive_or(value, default)
46
+ value && value.positive? ? value : default
47
+ end
48
+
49
+ def read_fixed(field_map, item, record, unique_id, fixed_records, var_data)
50
+ offset = item.fixed_offset
51
+ case item.data_type
52
+ when :date
53
+ Decode.timestamp(record, offset)
54
+ when :integer
55
+ record.byteslice(offset, 4).to_s.unpack1("l<")
56
+ when :duration
57
+ units = duration_units_of(field_map, item, unique_id, fixed_records, var_data)
58
+ value = record.byteslice(offset, 4).to_s.unpack1("l<")
59
+ value.nil? || value == -1 ? nil : adjusted_duration(value, units)
60
+ when :time_units
61
+ Decode.duration_units(u16(record, offset).to_i)
62
+ when :percentage
63
+ value = u16(record, offset)
64
+ value && value <= 100 ? value : nil
65
+ when :short, :constraint, :priority, :task_type
66
+ u16(record, offset)
67
+ when :boolean
68
+ u16(record, offset).to_i != 0
69
+ when :currency, :units
70
+ double = record.byteslice(offset, 8).to_s.unpack1("E")
71
+ double && double.abs >= 0.1 ? double / 100 : 0.0
72
+ when :work
73
+ double = record.byteslice(offset, 8).to_s.unpack1("E")
74
+ Duration.new(double && double.abs >= 1000 ? double / 60_000 : 0.0, :hours)
75
+ end
76
+ end
77
+
78
+ def read_var(field_map, item, unique_id, fixed_records, var_data)
79
+ key = item.var_key
80
+ case item.data_type
81
+ when :string
82
+ var_data.string(unique_id, key)
83
+ when :notes
84
+ RtfText.strip(var_data.byte_string(unique_id, key))
85
+ when :date
86
+ bytes = var_data.bytes(unique_id, key)
87
+ bytes && Decode.timestamp(bytes, 0)
88
+ when :integer
89
+ var_data.bytes(unique_id, key)&.unpack1("l<")
90
+ when :numeric
91
+ var_data.bytes(unique_id, key)&.unpack1("E")
92
+ when :currency
93
+ double = var_data.bytes(unique_id, key)&.unpack1("E")
94
+ double && double.abs >= 0.1 ? double / 100 : nil
95
+ when :work
96
+ double = var_data.bytes(unique_id, key)&.unpack1("E")
97
+ double && Duration.new(double.abs < 1000 ? 0.0 : double / 60_000, :hours)
98
+ when :duration
99
+ units = duration_units_of(field_map, item, unique_id, fixed_records, var_data)
100
+ value = var_data.bytes(unique_id, key)&.unpack1("l<")
101
+ value && adjusted_duration(value, units)
102
+ when :percentage, :short
103
+ var_data.bytes(unique_id, key)&.unpack1("v")
104
+ when :boolean
105
+ var_data.bytes(unique_id, key)&.unpack1("v").to_i != 0
106
+ end
107
+ end
108
+
109
+ def duration_units_of(field_map, item, unique_id, fixed_records, var_data)
110
+ units = item.units_field &&
111
+ read(field_map, item.units_field, unique_id, fixed_records, var_data)
112
+ units.is_a?(Symbol) ? units : :days
113
+ end
114
+
115
+ def u16(record, offset)
116
+ record.byteslice(offset, 2).to_s.unpack1("v")
117
+ end
118
+ end
119
+ end