bookie_accounting 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/.gitignore +19 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE +22 -0
  4. data/README.md +29 -0
  5. data/Rakefile +15 -0
  6. data/bin/bookie-create-tables +52 -0
  7. data/bin/bookie-data +102 -0
  8. data/bin/bookie-send +110 -0
  9. data/bookie_accounting.gemspec +28 -0
  10. data/lib/bookie.rb +11 -0
  11. data/lib/bookie/config.rb +101 -0
  12. data/lib/bookie/database.rb +656 -0
  13. data/lib/bookie/formatter.rb +149 -0
  14. data/lib/bookie/formatters/comma_dump.rb +24 -0
  15. data/lib/bookie/formatters/spreadsheet.rb +45 -0
  16. data/lib/bookie/formatters/stdout.rb +32 -0
  17. data/lib/bookie/sender.rb +108 -0
  18. data/lib/bookie/senders/standalone.rb +37 -0
  19. data/lib/bookie/senders/torque_cluster.rb +166 -0
  20. data/lib/bookie/version.rb +4 -0
  21. data/snapshot/config.json +12 -0
  22. data/snapshot/default.json +11 -0
  23. data/snapshot/pacct +0 -0
  24. data/snapshot/pacct_large +0 -0
  25. data/snapshot/pacct_test_config.json +14 -0
  26. data/snapshot/test_config.json +13 -0
  27. data/snapshot/torque +3 -0
  28. data/snapshot/torque_invalid_lines +5 -0
  29. data/snapshot/torque_invalid_lines_2 +4 -0
  30. data/snapshot/torque_invalid_lines_3 +3 -0
  31. data/snapshot/torque_large +100 -0
  32. data/spec/comma_dump_formatter_spec.rb +56 -0
  33. data/spec/config_spec.rb +55 -0
  34. data/spec/database_spec.rb +625 -0
  35. data/spec/formatter_spec.rb +93 -0
  36. data/spec/sender_spec.rb +104 -0
  37. data/spec/spec_helper.rb +121 -0
  38. data/spec/spreadsheet_formatter_spec.rb +112 -0
  39. data/spec/standalone_sender_spec.rb +40 -0
  40. data/spec/stdout_formatter_spec.rb +66 -0
  41. data/spec/torque_cluster_sender_spec.rb +111 -0
  42. metadata +227 -0
@@ -0,0 +1,149 @@
1
+ require 'bookie/database'
2
+
3
+ require 'date'
4
+ require 'spreadsheet'
5
+
6
+ module Bookie
7
+ ##
8
+ #Takes jobs from the database and creates summaries and tables in various output formats.
9
+ class Formatter
10
+ ##
11
+ #Creates a new Formatter object
12
+ #
13
+ #<tt>type</tt> should be a symbol that maps to one of the files in <tt>bookie/formatters</tt>.
14
+ #
15
+ #===Examples
16
+ # config = Bookie::Config.new('config.json')
17
+ # #Uses the spreadsheet formatter from 'bookie/formatters/spreadsheet'
18
+ # formatter = Bookie::Formatter::Formatter.new(config, :spreadsheet)
19
+ def initialize(type, filename = nil)
20
+ #Needed for symbol arguments
21
+ type = type.to_s
22
+ require "bookie/formatters/#{type}"
23
+ extend Bookie::Formatters.const_get(type.camelize)
24
+ self.open(filename)
25
+ end
26
+
27
+ ##
28
+ #An array containing the labels for each field in a summary
29
+ SUMMARY_FIELD_LABELS = [
30
+ "Number of jobs",
31
+ "Total wall time",
32
+ "Total CPU time",
33
+ "Successful",
34
+ "Available CPU time",
35
+ "CPU time used",
36
+ "Available memory (average)",
37
+ "Memory used (average)",
38
+ ]
39
+
40
+ ##
41
+ #An array containing the labels for each field in a details table
42
+ DETAILS_FIELD_LABELS = [
43
+ 'User', 'Group', 'System', 'System type', 'Start time', 'End time', 'Wall time',
44
+ 'CPU time', 'Memory usage', 'Exit code'
45
+ ]
46
+
47
+ ##
48
+ #Prints a summary of <tt>jobs</tt> and <tt>systems</tt> to <tt>io</tt>
49
+ #
50
+ #Use start_time and end_time to filter the jobs by a time range.
51
+ #
52
+ #It is probably not a good idea to apply any time-based filters to <tt>jobs</tt> or <tt>systems</tt> beforehand.
53
+ #
54
+ #Both <tt>jobs</tt> and <tt>systems</tt> should be either models or ActiveRecord::Relation objects.
55
+ #
56
+ #Returns the summaries for <tt>jobs</tt> and <tt>systems</tt>
57
+ def print_summary(jobs, systems, start_time = nil, end_time = nil)
58
+ jobs_summary = jobs.summary(start_time, end_time)
59
+ systems_summary = systems.summary(start_time, end_time)
60
+ cpu_time = jobs_summary[:cpu_time]
61
+ avail_cpu_time = systems_summary[:avail_cpu_time]
62
+ memory_time = jobs_summary[:memory_time]
63
+ avail_memory_time = systems_summary[:avail_memory_time]
64
+ field_values = [
65
+ jobs_summary[:jobs].length,
66
+ Formatter.format_duration(jobs_summary[:wall_time]),
67
+ Formatter.format_duration(cpu_time),
68
+ '%.4f%%' % (jobs_summary[:successful] * 100),
69
+ Formatter.format_duration(systems_summary[:avail_cpu_time]),
70
+ if avail_cpu_time == 0 then '0.0000%' else '%.4f%%' % (Float(cpu_time) / avail_cpu_time * 100) end,
71
+ "#{Integer(systems_summary[:avail_memory_avg])} kb",
72
+ if avail_memory_time == 0 then '0.0000%' else '%.4f%%' % (Float(memory_time) / avail_memory_time * 100) end
73
+ ]
74
+ do_print_summary(field_values)
75
+ return jobs_summary, systems_summary
76
+ end
77
+
78
+ ##
79
+ #Prints a table containing all details of <tt>jobs</tt>
80
+ #
81
+ #<tt>jobs</tt> should be an array.
82
+ def print_jobs(jobs)
83
+ do_print_jobs(jobs)
84
+ end
85
+
86
+ ##
87
+ #Flushes all output
88
+ #
89
+ #Should always be called after the desired information has been written
90
+ def flush()
91
+ do_flush() if self.respond_to?(:do_flush)
92
+ end
93
+
94
+ ##
95
+ #For each job, yields an array containing the field values to be used when printing a table of jobs
96
+ #
97
+ #call-seq:
98
+ # fields_for_each_job(jobs) { |fields| ... }
99
+ #
100
+ #<tt>jobs</tt> should be an array of Bookie::Database::Job objects.
101
+ #
102
+ #===Examples
103
+ # formatter.fields_for_each_job(jobs) do |fields|
104
+ # Bookie::Formatter::Formatter::DETAILS_FIELD_LABELS.zip(fields) do |label, field|
105
+ # puts "#{label}: #{field}"
106
+ # end
107
+ # end
108
+ def fields_for_each_job(jobs)
109
+ jobs.each do |job|
110
+ memory_stat_type = job.system.system_type.memory_stat_type
111
+ if memory_stat_type == :unknown
112
+ memory_stat_type = ''
113
+ else
114
+ memory_stat_type = " (#{memory_stat_type})"
115
+ end
116
+ yield [
117
+ job.user.name,
118
+ job.user.group.name,
119
+ job.system.name,
120
+ job.system.system_type.name,
121
+ job.start_time.getlocal.strftime('%Y-%m-%d %H:%M:%S'),
122
+ job.end_time.getlocal.strftime('%Y-%m-%d %H:%M:%S'),
123
+ Formatter.format_duration(job.end_time - job.start_time),
124
+ Formatter.format_duration(job.cpu_time),
125
+ "#{job.memory}kb#{memory_stat_type}",
126
+ job.exit_code
127
+ ]
128
+ end
129
+ end
130
+ protected :fields_for_each_job
131
+
132
+ ##
133
+ #Formats a duration in HH:MM:SS format
134
+ #
135
+ #<tt>dur</tt> should be a number in seconds.
136
+ def self.format_duration(dur)
137
+ dur = Integer(dur)
138
+ hours = dur / 3600
139
+ minutes = (dur - hours * 3600) / 60
140
+ seconds = dur % 60
141
+ return "#{hours.to_s.rjust(2, '0')}:#{minutes.to_s.rjust(2, '0')[0 .. 1]}:#{seconds.to_s.rjust(2, '0')[0 .. 1]}"
142
+ end
143
+ end
144
+
145
+ #Contains all formatter plugins
146
+ module Formatters
147
+
148
+ end
149
+ end
@@ -0,0 +1,24 @@
1
+ module Bookie
2
+ module Formatters
3
+ ##
4
+ #Formats data as a CSV file
5
+ module CommaDump
6
+ def open(filename)
7
+ @file = File.open(filename)
8
+ end
9
+
10
+ def do_print_summary(field_values)
11
+ Formatter::SUMMARY_FIELD_LABELS.zip(field_values) do |label, value|
12
+ @file.puts "#{label}, #{value}"
13
+ end
14
+ end
15
+
16
+ def do_print_jobs(jobs)
17
+ @file.puts Formatter::DETAILS_FIELD_LABELS.join(', ')
18
+ fields_for_each_job(jobs) do |fields|
19
+ @file.puts fields.join(', ')
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,45 @@
1
+ require 'spreadsheet'
2
+
3
+ module Bookie
4
+ module Formatters
5
+ ##
6
+ #Formats data as a Microsoft Excel spreadsheet
7
+ module Spreadsheet
8
+ def open(filename)
9
+ @filename = filename
10
+ @workbook = ::Spreadsheet::Workbook.new
11
+ end
12
+
13
+ #Actual printing is delayed until object finalization due to the workings of the Spreadsheet gem.
14
+ def do_print_summary(field_values)
15
+ s = @workbook.worksheet("Summary") || @workbook.create_worksheet(:name => "Summary")
16
+
17
+ s.column(0).width = 20
18
+ Formatter::SUMMARY_FIELD_LABELS.each_with_index do |value, index|
19
+ row = s.row(index)
20
+ row[0] = value
21
+ row[1] = field_values[index]
22
+ end
23
+ end
24
+
25
+ def do_print_jobs(jobs)
26
+ s = @workbook.worksheet("Details") || @workbook.create_worksheet(:name => "Details")
27
+
28
+ s.row(0).concat(Formatter::DETAILS_FIELD_LABELS)
29
+ (0 .. (Formatter::DETAILS_FIELD_LABELS.length - 1)).step do |i|
30
+ s.column(i).width = 20
31
+ end
32
+
33
+ index = 1
34
+ fields_for_each_job(jobs) do |fields|
35
+ s.row(index).concat(fields)
36
+ index += 1
37
+ end
38
+ end
39
+
40
+ def do_flush()
41
+ @workbook.write(@filename)
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,32 @@
1
+ module Bookie
2
+ module Formatters
3
+ ##
4
+ #Formats data in a human-readable text format intended to be send to standard output
5
+ module Stdout
6
+ def open(filename)
7
+ if filename
8
+ @io = File.open(filename)
9
+ else
10
+ @io = STDOUT
11
+ end
12
+ end
13
+
14
+ def do_print_summary(field_values)
15
+ Formatter::SUMMARY_FIELD_LABELS.zip(field_values) do |label, value|
16
+ @io.printf("%-30.30s%s\n", "#{label}:", value)
17
+ end
18
+ end
19
+
20
+ def do_print_jobs(jobs)
21
+ #To consider: optimize by moving out of the function?
22
+ format_string = "%-15.15s %-15.15s %-20.20s %-20.20s %-26.25s %-26.25s %-12.10s %-12.10s %-20.20s %-11.11s\n"
23
+ heading = sprintf(format_string, *Formatter::DETAILS_FIELD_LABELS)
24
+ @io.write heading
25
+ @io.puts '-' * (heading.length - 1)
26
+ fields_for_each_job(jobs) do |fields|
27
+ @io.printf(format_string, *fields)
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,108 @@
1
+ require 'bookie/database'
2
+
3
+ require 'date'
4
+
5
+ module Bookie
6
+ ##
7
+ #An object that sends data to the database
8
+ class Sender
9
+ ##
10
+ #Creates a new Sender
11
+ #
12
+ #<tt>config</tt> should be an instance of Bookie::Config.
13
+ def initialize(config)
14
+ @config = config
15
+ t = @config.system_type
16
+ require "bookie/senders/#{t}"
17
+ extend Bookie::Senders.const_get(t.camelize)
18
+ end
19
+
20
+ ##
21
+ #Retrieves the System object with which the jobs will be associated
22
+ #--
23
+ #To consider: caching?
24
+ #++
25
+ def system
26
+ hostname = @config.hostname
27
+ system_type = self.system_type
28
+ Bookie::Database::System.find_active(
29
+ :name => hostname,
30
+ :system_type => system_type,
31
+ :start_time => Time.now,
32
+ :cores => @config.cores,
33
+ :memory => @config.memory
34
+ )
35
+ end
36
+
37
+ ##
38
+ #Sends job data from the given file to the database server
39
+ def send_data(filename)
40
+ raise IOError.new("File '#{filename}' does not exist.") unless File.exists?(filename)
41
+
42
+ system = self.system
43
+
44
+ known_users = {}
45
+ known_groups = {}
46
+
47
+ #Check the first job to see if there are entries in the database for its date from this system.
48
+ each_job(filename) do |job|
49
+ next if filtered?(job)
50
+ end_time = job.start_time + job.wall_time
51
+ duplicate = system.jobs.find_by_end_time(end_time)
52
+ if duplicate
53
+ raise "Jobs already exist in the database for the date #{end_time.strftime('%Y-%m-%d')}."
54
+ end
55
+ break
56
+ end
57
+
58
+ each_job(filename) do |job|
59
+ next if filtered?(job)
60
+ db_job = job.to_model
61
+ #Determine if the user/group pair must be added to/retrieved from the database.
62
+ user = Bookie::Database::User.find_or_create!(
63
+ job.user_name,
64
+ Bookie::Database::Group.find_or_create!(job.group_name, known_groups),
65
+ known_users)
66
+ db_job.system = system
67
+ db_job.user = user
68
+ db_job.save!
69
+ end
70
+ end
71
+
72
+ ##
73
+ #The name of the Bookie::Database::SystemType that systems using this sender will have
74
+ def system_type
75
+ Bookie::Database::SystemType.find_or_create!(system_type_name, memory_stat_type)
76
+ end
77
+
78
+ ##
79
+ #Returns whether a job should be filtered from the results
80
+ #
81
+ def filtered?(job)
82
+ @config.excluded_users.include?job.user_name
83
+ end
84
+ end
85
+
86
+ ##
87
+ #This module is mixed into various job classes used internally by senders.
88
+ module ModelHelpers
89
+ ##
90
+ #Converts the object to a Bookie::Database::Job
91
+ def to_model()
92
+ job = Bookie::Database::Job.new
93
+ job.command_name = self.command_name
94
+ job.start_time = self.start_time
95
+ job.end_time = self.start_time + self.wall_time
96
+ job.wall_time = self.wall_time
97
+ job.cpu_time = self.cpu_time
98
+ job.memory = self.memory
99
+ job.exit_code = self.exit_code
100
+ return job
101
+ end
102
+ end
103
+
104
+ #Contains all sender plugins
105
+ module Senders
106
+
107
+ end
108
+ end
@@ -0,0 +1,37 @@
1
+ require 'fileutils'
2
+ require 'pacct'
3
+
4
+ module Bookie
5
+ module Senders
6
+ ##
7
+ #Returns data from a standalone Linux system
8
+ module Standalone
9
+ ##
10
+ #Yields each job in the log
11
+ def each_job(filename)
12
+ file = Pacct::Log.new(filename)
13
+ file.each_entry do |job|
14
+ yield job
15
+ end
16
+ end
17
+
18
+ def system_type_name
19
+ "Standalone"
20
+ end
21
+
22
+ def memory_stat_type
23
+ :avg
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ ##
30
+ #Originates from the <tt>pacct</tt> gem
31
+ module Pacct
32
+ ##
33
+ #Originates from the <tt>pacct</tt> gem; redefined here to include Bookie::Sender::ModelHelpers
34
+ class Entry
35
+ include Bookie::ModelHelpers
36
+ end
37
+ end
@@ -0,0 +1,166 @@
1
+
2
+ module Bookie
3
+ module Senders
4
+ ##
5
+ #Returns data from a TORQUE cluster log
6
+ module TorqueCluster
7
+ #Yields each job in the log
8
+ def each_job(filename)
9
+ record = Torque::JobLog.new(filename)
10
+ record.each_job do |job|
11
+ yield job
12
+ end
13
+ end
14
+
15
+ def system_type_name
16
+ return "TORQUE cluster"
17
+ end
18
+
19
+ def memory_stat_type
20
+ return :max
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ ##
27
+ #Contains tools for working with TORQUE data
28
+ module Torque
29
+ #Represents a completed job
30
+ class Job
31
+ #The name of the user who created the job
32
+ attr_accessor :user_name
33
+ #The group name of the user who created the job
34
+ attr_accessor :group_name
35
+ #The name of the command
36
+ attr_accessor :command_name
37
+ #The job's start time
38
+ attr_accessor :start_time
39
+ #The job's total wall time
40
+ attr_accessor :wall_time
41
+ #The job's total CPU time
42
+ attr_accessor :cpu_time
43
+ #The job's maximum physical memory usage in kilobytes
44
+ attr_accessor :physical_memory
45
+ #The job's maximum virtual memory usage in kilobytes
46
+ attr_accessor :virtual_memory
47
+ #The job's exit code
48
+ attr_accessor :exit_code
49
+
50
+ #Returns the job's total maximum memory usage in kilobytes
51
+ def memory
52
+ physical_memory + virtual_memory
53
+ end
54
+ end
55
+
56
+ #Represents a job record file
57
+ class JobLog
58
+ #The name of the accounting file opened
59
+ attr_reader :filename
60
+
61
+ #Creates a JobRecord using the TORQUE record file for the given date
62
+ def initialize(filename)
63
+ @filename = filename
64
+ @file = File.open(filename)
65
+ end
66
+
67
+ ##
68
+ #Raised when a line in the file is invalid
69
+ class InvalidLineError < RuntimeError
70
+ def initialize(filename, line_num)
71
+ super("Line #{line_num} of file '#{filename}' is invalid.")
72
+ end
73
+ end
74
+
75
+ ##
76
+ #Yields each completed job to the given block
77
+ def each_job
78
+ @file.rewind
79
+ line_num = 0
80
+ @file.each_line do |line|
81
+ line_num += 1
82
+ next if line.strip! == ''
83
+ #Skip the timestamp.
84
+ index = line.index(';')
85
+ raise invalid_line_error(line_num) unless index
86
+
87
+ #Find the event type.
88
+ event_type = line[index + 1]
89
+ old_index = index
90
+ index = line.index(';', index + 1)
91
+ raise invalid_line_error(line_num) unless index == old_index + 2
92
+ next unless event_type == ?E
93
+
94
+ #Find the fields.
95
+ index = line.index(';', index + 1)
96
+ raise invalid_line_error(line_num) unless index
97
+ fields = line[index + 1 .. -1].split(' ')
98
+
99
+ job = Job.new()
100
+
101
+ #To consider: make sure all fields are present?
102
+ fields.each do |field|
103
+ key, value = *field.split('=')
104
+ case key
105
+ when "user"
106
+ job.user_name = value
107
+ when "group"
108
+ job.group_name = value
109
+ when "start"
110
+ job.start_time = Time.at(Integer(value))
111
+ when "resources_used.walltime"
112
+ job.wall_time = parse_duration(value)
113
+ when "resources_used.cput"
114
+ job.cpu_time = parse_duration(value)
115
+ when "resources_used.mem"
116
+ job.physical_memory = Integer(value[0 ... -2])
117
+ when "resources_used.vmem"
118
+ job.virtual_memory = Integer(value[0 ... -2])
119
+ when "Exit_status"
120
+ job.exit_code = Integer(value)
121
+ end
122
+ end
123
+ job.command_name = ""
124
+
125
+ yield job
126
+ end
127
+ end
128
+
129
+ ##
130
+ #Creates an InvalidLineError associated with this object's file
131
+ def invalid_line_error(line_num)
132
+ InvalidLineError.new(@filename, line_num)
133
+ end
134
+ protected :invalid_line_error
135
+
136
+ ##
137
+ #Parses a duration in HH:MM:SS format, returning seconds
138
+ #--
139
+ #To consider: make class method?
140
+ #++
141
+ def parse_duration(str)
142
+ hours, minutes, seconds = *str.split(':').map!{ |s| Integer(s) }
143
+ return hours * 3600 + minutes * 60 + seconds
144
+ end
145
+ protected :parse_duration
146
+
147
+ ##
148
+ #Converts a date to the name of the file holding entries for that date
149
+ def self.filename_for_date(date)
150
+ File.join(Torque::torque_root, 'server_priv', 'accounting', date.strftime("%Y%m%d"))
151
+ end
152
+ end
153
+
154
+ class << self;
155
+ #The TORQUE root directory (usually the value of the environment variable TORQUEROOT)
156
+ attr_accessor :torque_root
157
+ end
158
+ #To consider: make class variable? Constant?
159
+ @torque_root = ENV['TORQUEROOT'] || '/var/spool/torque'
160
+ end
161
+
162
+ module Torque
163
+ class Job
164
+ include Bookie::ModelHelpers
165
+ end
166
+ end