naf 2.1.10 → 2.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/RELEASE_NOTES.rdoc +12 -0
- data/app/assets/images/diagram_affinities.png +0 -0
- data/app/assets/images/diagram_runners.png +0 -0
- data/app/assets/images/diagram_running_system.png +0 -0
- data/app/controllers/naf/log_parsers_controller.rb +11 -3
- data/app/controllers/naf/log_viewer_controller.rb +2 -2
- data/app/models/logical/naf/construction_zone/boss.rb +34 -8
- data/app/models/logical/naf/log_parser/job.rb +2 -0
- data/app/models/naf/affinity.rb +10 -1
- data/app/models/naf/historical_job.rb +8 -0
- data/app/models/naf/running_job.rb +9 -0
- data/app/models/process/naf/machine_manager.rb +9 -3
- data/app/models/process/naf/runner.rb +70 -48
- data/bin/naf +18 -4
- data/lib/naf/version.rb +1 -1
- data/naf.gemspec +1 -1
- data/spec/factories/naf.rb +3 -3
- data/spec/models/logical/naf/construction_zone/{boss_rspec.rb → boss_spec.rb} +45 -3
- data/spec/models/naf/affinity_spec.rb +42 -3
- metadata +7 -4
data/RELEASE_NOTES.rdoc
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
= Release Notes
|
2
2
|
|
3
|
+
=== Version 2.1.11
|
4
|
+
Bug fixes:
|
5
|
+
* Runner behaves correctly when it tries to delete a non-existing running job
|
6
|
+
* Adding application schedule prerequisites works correctly
|
7
|
+
* Log display outputs custom message when record id is not present
|
8
|
+
|
9
|
+
Changes:
|
10
|
+
* Only show affinities associated with machines that are not deleted
|
11
|
+
* Runner will cleanup other runners after they are dead
|
12
|
+
* Machine manager updates machines row if server address or server name match
|
13
|
+
* Improvements on naf runner script
|
14
|
+
|
3
15
|
=== Version 2.1.10
|
4
16
|
Bug fixes:
|
5
17
|
* LogArchiver correctly removes files and directories
|
Binary file
|
Binary file
|
Binary file
|
@@ -3,10 +3,18 @@ module Naf
|
|
3
3
|
|
4
4
|
def logs
|
5
5
|
if naf_cookie_valid?
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
if params['record_id'].present?
|
7
|
+
response = params['logical_type'].constantize.new(params).logs
|
8
|
+
|
9
|
+
if response.present?
|
10
|
+
success = true
|
11
|
+
else
|
12
|
+
success = false
|
13
|
+
end
|
9
14
|
else
|
15
|
+
response = {
|
16
|
+
logs: ' <span>Record id is not present</br></span>'
|
17
|
+
}
|
10
18
|
success = false
|
11
19
|
end
|
12
20
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Naf
|
2
2
|
class LogViewerController < Naf::ApplicationController
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
def index
|
5
|
+
if params['record_type'] == 'job'
|
6
6
|
@job = ::Naf::HistoricalJob.find_by_id(params['record_id'].to_i)
|
7
7
|
@status = ::Logical::Naf::Job.new(@job).status
|
8
8
|
@partial = 'job_logs'
|
@@ -18,17 +18,43 @@ module Logical::Naf::ConstructionZone
|
|
18
18
|
work_order = ApplicationWorkOrder.new(application,
|
19
19
|
application_run_group_restriction,
|
20
20
|
application_run_group_name,
|
21
|
-
application_run_group_limit
|
22
|
-
priority
|
23
|
-
affinities
|
24
|
-
prerequisites
|
25
|
-
enqueue_backlogs
|
21
|
+
application_run_group_limit,
|
22
|
+
priority,
|
23
|
+
affinities,
|
24
|
+
prerequisites,
|
25
|
+
enqueue_backlogs)
|
26
26
|
@foreman.enqueue(work_order)
|
27
27
|
end
|
28
28
|
|
29
|
-
def enqueue_application_schedule(application_schedule)
|
30
|
-
|
31
|
-
|
29
|
+
def enqueue_application_schedule(application_schedule, schedules_queued_already = [])
|
30
|
+
prerequisite_jobs = []
|
31
|
+
|
32
|
+
# Check if schedule has been queued
|
33
|
+
if schedules_queued_already.include? application_schedule.id
|
34
|
+
raise ::Naf::HistoricalJob::JobPrerequisiteLoop.new(application_schedule)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Keep track of queued schedules
|
38
|
+
schedules_queued_already << application_schedule.id
|
39
|
+
# Queue application schedule prerequisites
|
40
|
+
application_schedule.prerequisites.each do |application_schedule_prerequisite|
|
41
|
+
job = enqueue_application_schedule(application_schedule_prerequisite, schedules_queued_already)
|
42
|
+
if job.present?
|
43
|
+
prerequisite_jobs << job
|
44
|
+
else
|
45
|
+
return
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Queue the application
|
50
|
+
return enqueue_application(application_schedule.application,
|
51
|
+
application_schedule.application_run_group_restriction,
|
52
|
+
application_schedule.application_run_group_name,
|
53
|
+
application_schedule.application_run_group_limit,
|
54
|
+
application_schedule.priority,
|
55
|
+
application_schedule.affinities,
|
56
|
+
prerequisite_jobs,
|
57
|
+
application_schedule.enqueue_backlogs)
|
32
58
|
end
|
33
59
|
|
34
60
|
def enqueue_rails_command(command,
|
data/app/models/naf/affinity.rb
CHANGED
@@ -51,8 +51,17 @@ module Naf
|
|
51
51
|
where(selectable: true)
|
52
52
|
end
|
53
53
|
|
54
|
+
def self.deleted_machine_affinities
|
55
|
+
joins(:affinity_classification).
|
56
|
+
joins("INNER JOIN #{Naf.schema_name}.machines AS m
|
57
|
+
ON CAST (m.id AS TEXT) = #{Naf.schema_name}.affinities.affinity_name").
|
58
|
+
where("#{Naf.schema_name}.affinity_classifications.affinity_classification_name = 'machine' AND
|
59
|
+
m.deleted IS TRUE")
|
60
|
+
end
|
61
|
+
|
54
62
|
def self.names_list
|
55
|
-
|
63
|
+
# Don't include affinity that is associated with a deleted machine
|
64
|
+
(selectable - deleted_machine_affinities).map do |a|
|
56
65
|
classification = a.affinity_classification
|
57
66
|
if classification.affinity_classification_name == 'machine'
|
58
67
|
if a.affinity_short_name.present?
|
@@ -1,5 +1,7 @@
|
|
1
1
|
module Naf
|
2
2
|
class RunningJob < NafBase
|
3
|
+
include PgAdvisoryLocker
|
4
|
+
|
3
5
|
# Protect from mass-assignment issue
|
4
6
|
attr_accessible :application_id,
|
5
7
|
:application_schedule_id,
|
@@ -117,6 +119,13 @@ module Naf
|
|
117
119
|
self.save!
|
118
120
|
end
|
119
121
|
|
122
|
+
def lock_for_runner_use(&block)
|
123
|
+
advisory_lock(&block)
|
124
|
+
end
|
125
|
+
|
126
|
+
def unlock_for_runner_use
|
127
|
+
advisory_unlock
|
128
|
+
end
|
120
129
|
|
121
130
|
end
|
122
131
|
end
|
@@ -28,9 +28,15 @@ module Process::Naf
|
|
28
28
|
machine = ::Naf::Machine.find_by_server_address(@server_address)
|
29
29
|
if machine.blank?
|
30
30
|
server_name = (`hostname`).strip
|
31
|
-
machine = ::Naf::Machine.
|
32
|
-
|
33
|
-
|
31
|
+
machine = ::Naf::Machine.find_by_server_name(server_name)
|
32
|
+
if machine.blank?
|
33
|
+
machine = ::Naf::Machine.create(server_address: @server_address,
|
34
|
+
server_name: server_name)
|
35
|
+
add_default_affinities(machine)
|
36
|
+
else
|
37
|
+
machine.server_address = @server_address
|
38
|
+
machine.save!
|
39
|
+
end
|
34
40
|
end
|
35
41
|
|
36
42
|
machine.server_note = @server_note unless @server_note.nil?
|
@@ -4,7 +4,8 @@ module Process::Naf
|
|
4
4
|
class Runner < ::Af::Application
|
5
5
|
|
6
6
|
attr_accessor :machine,
|
7
|
-
:current_invocation
|
7
|
+
:current_invocation,
|
8
|
+
:last_cleaned_up_processes
|
8
9
|
|
9
10
|
#----------------
|
10
11
|
# *** Options ***
|
@@ -64,14 +65,13 @@ module Process::Naf
|
|
64
65
|
@machine = ::Naf::Machine.find_by_server_address(@server_address)
|
65
66
|
|
66
67
|
unless machine.present?
|
67
|
-
logger.fatal "This machine is not configued correctly (ipaddress: #{@server_address})."
|
68
|
-
logger.fatal "Please update #{::Naf::Machine.table_name} with an entry for this machine."
|
68
|
+
logger.fatal escape_html("This machine is not configued correctly (ipaddress: #{@server_address}).")
|
69
|
+
logger.fatal escape_html("Please update #{::Naf::Machine.table_name} with an entry for this machine.")
|
69
70
|
logger.fatal "Exiting..."
|
70
71
|
exit 1
|
71
72
|
end
|
72
73
|
|
73
|
-
machine.lock_for_runner_use
|
74
|
-
begin
|
74
|
+
machine.lock_for_runner_use do
|
75
75
|
cleanup_old_processes
|
76
76
|
remove_invalid_running_jobs
|
77
77
|
wind_down_runners
|
@@ -85,8 +85,6 @@ module Process::Naf
|
|
85
85
|
create!({ machine_runner_id: machine_runner.id,
|
86
86
|
pid: Process.pid,
|
87
87
|
uuid: @invocation_uuid }.merge!(retrieve_invocation_information))
|
88
|
-
ensure
|
89
|
-
machine.unlock_for_runner_use
|
90
88
|
end
|
91
89
|
|
92
90
|
begin
|
@@ -99,14 +97,17 @@ module Process::Naf
|
|
99
97
|
end
|
100
98
|
|
101
99
|
def remove_invalid_running_jobs
|
100
|
+
logger.debug "looking for invalid running jobs"
|
102
101
|
::Naf::RunningJob.
|
103
102
|
joins("INNER JOIN #{Naf.schema_name}.historical_jobs AS hj ON hj.id = #{Naf.schema_name}.running_jobs.id").
|
104
103
|
where('finished_at IS NOT NULL AND hj.started_on_machine_id = ?', @machine.id).readonly(false).each do |job|
|
104
|
+
logger.debug escape_html("removing invalid job #{job.inspect}")
|
105
105
|
job.delete
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
109
|
def check_gc_configurations
|
110
|
+
logger.debug "checking garbage collection configurations"
|
110
111
|
unless @disable_gc_modifications
|
111
112
|
# These configuration changes will help forked processes, not the runner
|
112
113
|
ENV['RUBY_HEAP_MIN_SLOTS'] = '500000'
|
@@ -116,9 +117,11 @@ module Process::Naf
|
|
116
117
|
end
|
117
118
|
end
|
118
119
|
|
119
|
-
def cleanup_old_processes
|
120
|
-
|
121
|
-
|
120
|
+
def cleanup_old_processes(created_at_interval = 1.month, marked_dead_interval = 24.hours)
|
121
|
+
@last_cleaned_up_processes = Time.zone.now
|
122
|
+
logger.debug "cleaning up old processes"
|
123
|
+
::Naf::MachineRunner.where("created_at >= ?", Time.zone.now - created_at_interval).each do |runner|
|
124
|
+
runner.machine_runner_invocations.recently_marked_dead(marked_dead_interval).each do |invocation|
|
122
125
|
terminate_old_processes(invocation)
|
123
126
|
end
|
124
127
|
end
|
@@ -130,11 +133,11 @@ module Process::Naf
|
|
130
133
|
if invocation.dead_at.blank?
|
131
134
|
begin
|
132
135
|
retval = Process.kill(0, invocation.pid)
|
133
|
-
logger.detail "#{retval} = kill(0, #{invocation.pid}) -- process alive, marking runner invocation as winding down"
|
136
|
+
logger.detail escape_html("#{retval} = kill(0, #{invocation.pid}) -- process alive, marking runner invocation as winding down")
|
134
137
|
invocation.wind_down_at = Time.zone.now
|
135
138
|
invocation.save!
|
136
139
|
rescue Errno::ESRCH
|
137
|
-
logger.detail "ESRCH = kill(0, #{invocation.pid}) -- marking runner invocation as not running"
|
140
|
+
logger.detail escape_html("ESRCH = kill(0, #{invocation.pid}) -- marking runner invocation as not running")
|
138
141
|
invocation.dead_at = Time.zone.now
|
139
142
|
invocation.save!
|
140
143
|
terminate_old_processes(invocation)
|
@@ -211,6 +214,7 @@ module Process::Naf
|
|
211
214
|
return false
|
212
215
|
end
|
213
216
|
|
217
|
+
logger.debug "marking machine alive"
|
214
218
|
machine.mark_alive
|
215
219
|
|
216
220
|
check_log_level
|
@@ -227,6 +231,7 @@ module Process::Naf
|
|
227
231
|
end
|
228
232
|
|
229
233
|
cleanup_dead_children
|
234
|
+
cleanup_old_processes(1.week, 75.minutes) if (Time.zone.now - @last_cleaned_up_processes) > 1.hour
|
230
235
|
|
231
236
|
return true
|
232
237
|
end
|
@@ -298,7 +303,7 @@ module Process::Naf
|
|
298
303
|
logger.warn e
|
299
304
|
pid = @children.first.try(:first)
|
300
305
|
status = nil
|
301
|
-
logger.warn "pulling first child off list to clean it up: pid=#{pid}"
|
306
|
+
logger.warn escape_html("pulling first child off list to clean it up: pid=#{pid}")
|
302
307
|
end
|
303
308
|
|
304
309
|
if pid
|
@@ -327,7 +332,7 @@ module Process::Naf
|
|
327
332
|
def check_dead_children_not_exited_properly
|
328
333
|
dead_children = []
|
329
334
|
@children.each do |pid, child|
|
330
|
-
unless is_job_process_alive?(child
|
335
|
+
unless is_job_process_alive?(child)
|
331
336
|
dead_children << child
|
332
337
|
end
|
333
338
|
end
|
@@ -346,27 +351,25 @@ module Process::Naf
|
|
346
351
|
child_job.remove_tags([::Naf::HistoricalJob::SYSTEM_TAGS[:work]])
|
347
352
|
|
348
353
|
if status.nil? || status.exited? || status.signaled?
|
349
|
-
logger.info { escape_html("cleaning up dead child: #{child_job.
|
354
|
+
logger.info { escape_html("cleaning up dead child: #{child_job.inspect}") }
|
350
355
|
finish_job(child_job,
|
351
356
|
{ exit_status: (status && status.exitstatus), termination_signal: (status && status.termsig) })
|
352
357
|
else
|
353
358
|
# this can happen if the child is sigstopped
|
354
|
-
logger.warn escape_html("child waited for did not exit: #{child_job}, status: #{status.inspect}")
|
359
|
+
logger.warn escape_html("child waited for did not exit: #{child_job.inspect}, status: #{status.inspect}")
|
355
360
|
end
|
356
361
|
else
|
357
362
|
# XXX ERROR no child for returned pid -- this can't happen
|
358
|
-
logger.warn "child pid: #{pid}, status: #{status.inspect}, not managed by this runner"
|
363
|
+
logger.warn escape_html("child pid: #{pid}, status: #{status.inspect}, not managed by this runner")
|
359
364
|
end
|
360
365
|
end
|
361
366
|
|
362
367
|
def start_new_jobs
|
363
|
-
|
364
|
-
logger.detail "starting new jobs, num children: #{@children.length}/#{machine.thread_pool_size}"
|
365
|
-
# XXX while @children.length < machine.thread_pool_size && memory_available_to_spawn? && current_invocation.wind_down_at.blank?
|
368
|
+
logger.detail escape_html("starting new jobs, num children: #{@children.length}/#{machine.thread_pool_size}")
|
366
369
|
while ::Naf::RunningJob.where(started_on_machine_id: machine.id).count < machine.thread_pool_size &&
|
367
370
|
memory_available_to_spawn? && current_invocation.wind_down_at.blank?
|
368
371
|
|
369
|
-
logger.debug_gross "fetching jobs because: children: #{@children.length} < #{machine.thread_pool_size} (poolsize)"
|
372
|
+
logger.debug_gross escape_html("fetching jobs because: children: #{@children.length} < #{machine.thread_pool_size} (poolsize)")
|
370
373
|
begin
|
371
374
|
running_job = @job_fetcher.fetch_next_job
|
372
375
|
|
@@ -384,12 +387,12 @@ module Process::Naf
|
|
384
387
|
running_job.historical_job.pid = pid
|
385
388
|
running_job.historical_job.failed_to_start = false
|
386
389
|
running_job.historical_job.machine_runner_invocation_id = current_invocation.id
|
387
|
-
logger.info escape_html("job started : #{running_job}")
|
388
390
|
running_job.save!
|
389
391
|
running_job.historical_job.save!
|
392
|
+
logger.info escape_html("job started : #{running_job.inspect}")
|
390
393
|
else
|
391
394
|
# should never get here (well, hopefully)
|
392
|
-
logger.error escape_html("#{machine}: failed to execute #{running_job}")
|
395
|
+
logger.error escape_html("#{machine}: failed to execute #{running_job.inspect}")
|
393
396
|
|
394
397
|
finish_job(running_job, { failed_to_start: true })
|
395
398
|
end
|
@@ -404,7 +407,7 @@ module Process::Naf
|
|
404
407
|
logger.debug_gross "done starting jobs"
|
405
408
|
end
|
406
409
|
|
407
|
-
#
|
410
|
+
# update_all doesn't support "from_partition" so we have this helper
|
408
411
|
def update_historical_job(updates, historical_job_id)
|
409
412
|
updates[:updated_at] = Time.zone.now
|
410
413
|
update_columns = updates.map{ |k,v| "#{k} = ?" }.join(", ")
|
@@ -420,14 +423,25 @@ module Process::Naf
|
|
420
423
|
end
|
421
424
|
|
422
425
|
def finish_job(running_job, updates = {})
|
423
|
-
if
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
426
|
+
# Check to see if running job still exists
|
427
|
+
job = ::Naf::RunningJob.find_by_id(running_job.id)
|
428
|
+
if job.present?
|
429
|
+
job.lock_for_runner_use do
|
430
|
+
::Naf::HistoricalJob.transaction do
|
431
|
+
update_historical_job(updates.merge({ finished_at: Time.zone.now }), job.id)
|
432
|
+
job.delete
|
433
|
+
end
|
434
|
+
end
|
435
|
+
else
|
436
|
+
job = ::Naf::HistoricalJob.find_by_id(running_job.id)
|
437
|
+
# This does not seem to be need, but just for extra measure
|
438
|
+
if job.present?
|
439
|
+
job.lock_for_runner_use do
|
440
|
+
::Naf::HistoricalJob.transaction do
|
441
|
+
update_historical_job(updates.merge({ finished_at: Time.zone.now }), job.id)
|
442
|
+
end
|
443
|
+
end
|
444
|
+
end
|
431
445
|
end
|
432
446
|
end
|
433
447
|
|
@@ -439,7 +453,10 @@ module Process::Naf
|
|
439
453
|
@children.clone.each do |pid, child|
|
440
454
|
send_signal_and_maybe_clean_up(child, "TERM")
|
441
455
|
end
|
456
|
+
|
457
|
+
# Wait 2 seconds
|
442
458
|
sleep(2)
|
459
|
+
|
443
460
|
@children.clone.each do |pid, child|
|
444
461
|
send_signal_and_maybe_clean_up(child, "KILL")
|
445
462
|
|
@@ -456,9 +473,10 @@ module Process::Naf
|
|
456
473
|
logger.detail "no jobs to remove"
|
457
474
|
return
|
458
475
|
end
|
476
|
+
|
459
477
|
logger.info "number of old jobs to sift through: #{jobs.length}"
|
460
478
|
jobs.each do |job|
|
461
|
-
logger.detail escape_html("job still around: #{job}")
|
479
|
+
logger.detail escape_html("job still around: #{job.inspect}")
|
462
480
|
if job.request_to_terminate == false
|
463
481
|
logger.warn "politely asking process: #{job.pid} to terminate itself"
|
464
482
|
job.request_to_terminate = true
|
@@ -482,7 +500,7 @@ module Process::Naf
|
|
482
500
|
return
|
483
501
|
end
|
484
502
|
jobs.each do |job|
|
485
|
-
logger.warn escape_html("sending SIG_TERM to process: #{job}")
|
503
|
+
logger.warn escape_html("sending SIG_TERM to process: #{job.inspect}")
|
486
504
|
send_signal_and_maybe_clean_up(job, "TERM")
|
487
505
|
end
|
488
506
|
|
@@ -496,7 +514,7 @@ module Process::Naf
|
|
496
514
|
|
497
515
|
# kill with fire
|
498
516
|
assigned_jobs(record).each do |job|
|
499
|
-
logger.alarm escape_html("sending SIG_KILL to process: #{job}")
|
517
|
+
logger.alarm escape_html("sending SIG_KILL to process: #{job.inspect}")
|
500
518
|
send_signal_and_maybe_clean_up(job, "KILL")
|
501
519
|
|
502
520
|
# job force job down
|
@@ -522,6 +540,7 @@ module Process::Naf
|
|
522
540
|
|
523
541
|
return false
|
524
542
|
end
|
543
|
+
|
525
544
|
return true
|
526
545
|
end
|
527
546
|
|
@@ -545,9 +564,22 @@ module Process::Naf
|
|
545
564
|
Facter.clear
|
546
565
|
memory_size = Facter.memorysize_mb.to_f
|
547
566
|
memory_free = Facter.memoryfree_mb.to_f
|
567
|
+
memory_free_percentage = ((memory_free + sreclaimable_memory) / memory_size) * 100.0
|
568
|
+
|
569
|
+
if (memory_free_percentage >= @minimum_memory_free)
|
570
|
+
logger.detail "memory available: #{memory_free_percentage}% (free) >= " +
|
571
|
+
"#{@minimum_memory_free}% (min percent)"
|
572
|
+
return true
|
573
|
+
end
|
574
|
+
logger.alarm "#{Facter.hostname}.#{Facter.domain}: not enough memory to spawn: " +
|
575
|
+
"#{memory_free_percentage}% (free) < #{@minimum_memory_free}% (min percent)"
|
548
576
|
|
549
|
-
|
550
|
-
|
577
|
+
return false
|
578
|
+
end
|
579
|
+
|
580
|
+
# Linux breaks out kernel cache-use memory into an SReclaimable stat
|
581
|
+
# in /proc/meminfo which should be counted as free, but facter does not.
|
582
|
+
def sreclaimable_memory
|
551
583
|
sreclaimable = 0.0
|
552
584
|
begin
|
553
585
|
File.readlines('/proc/meminfo').each do |l|
|
@@ -561,17 +593,7 @@ module Process::Naf
|
|
561
593
|
rescue
|
562
594
|
end
|
563
595
|
|
564
|
-
|
565
|
-
|
566
|
-
if (memory_free_percentage >= @minimum_memory_free)
|
567
|
-
logger.detail "memory available: #{memory_free_percentage}% (free) >= " +
|
568
|
-
"#{@minimum_memory_free}% (min percent)"
|
569
|
-
return true
|
570
|
-
end
|
571
|
-
logger.alarm "#{Facter.hostname}.#{Facter.domain}: not enough memory to spawn: " +
|
572
|
-
"#{memory_free_percentage}% (free) < #{@minimum_memory_free}% (min percent)"
|
573
|
-
|
574
|
-
return false
|
596
|
+
sreclaimable
|
575
597
|
end
|
576
598
|
|
577
599
|
def escape_html(str)
|
data/bin/naf
CHANGED
@@ -5,22 +5,36 @@ action, option = ARGV
|
|
5
5
|
if action == 'runner'
|
6
6
|
if option == 'up'
|
7
7
|
puts "Bringing up the runner(s)..."
|
8
|
-
`screen -d -m bash -c 'source /root/.bash_profile && cd /root/current && a=\`uuidgen\` &&
|
8
|
+
`screen -d -m bash -c 'source /root/.bash_profile && cd /root/current && a=\`uuidgen\` &&
|
9
|
+
script/rails runner ::Process::Naf::Runner.run --invocation-uuid $a 2>&1 |
|
9
10
|
script/rails runner ::Process::Naf::Logger::RunnerLog.run --invocation-uuid $a'`
|
10
11
|
|
11
12
|
elsif option == 'status'
|
12
13
|
num_runners = Integer(`ps -ef | grep Process::Naf::Runner.run | grep -v grep | grep -v uuidgen | wc -l`.strip)
|
13
14
|
hostname = `hostname`.strip
|
14
15
|
if num_runners == 0
|
15
|
-
puts "down: #{hostname}"
|
16
|
+
puts "Runner down on host: #{hostname}"
|
16
17
|
elsif num_runners == 1
|
17
|
-
puts "up: #{hostname}"
|
18
|
+
puts "Runner up on host: #{hostname}"
|
18
19
|
else
|
19
|
-
puts "up
|
20
|
+
puts "1 runner up, #{num_runners - 1} runner(s) winding down on host: #{hostname}"
|
20
21
|
end
|
21
22
|
|
22
23
|
elsif option == 'down'
|
23
24
|
puts 'Bringing down runner(s)...'
|
24
25
|
`kill $(ps -ef | grep Process::Naf::Runner.run | grep -v grep | grep -v uuidgen | awk '{ print $2 }') | cat`
|
26
|
+
|
27
|
+
elsif option == '--?' || option == '--help'
|
28
|
+
puts "DESCRIPTION\n\tThe following options are available:\n\n" +
|
29
|
+
"\tup\t->\tBrings up new Runner and RunnerLog processes on the host.\n\n" +
|
30
|
+
"\tstatus\t->\tLists the status of the runner based on unix process status. The runner can be down, up, or up/winding down.\n\n" +
|
31
|
+
"\tdown\t->\tTakes down the runner by sending a kill signal to the processes specified by the pid operand."
|
32
|
+
else
|
33
|
+
puts "Sorry, option \'#{option}\' is not available. Available options: up, status, down, --?, --help"
|
25
34
|
end
|
35
|
+
elsif action == '--?' || action == '--help'
|
36
|
+
puts "DESCRIPTION\n\tThe following actions are available:\n\n" +
|
37
|
+
"\trunner\t->\tControls the Naf runner."
|
38
|
+
else
|
39
|
+
puts "Sorry, action \'#{action}\' is not available. Available actions: runner, --?, --help"
|
26
40
|
end
|
data/lib/naf/version.rb
CHANGED
data/naf.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.name = 'naf'
|
9
9
|
s.version = Naf::VERSION
|
10
10
|
s.license = 'New BSD License'
|
11
|
-
s.date = '2014-04-
|
11
|
+
s.date = '2014-04-30'
|
12
12
|
s.summary = 'Creates infrastructure for a customizable and robust Postgres-backed script scheduling/running'
|
13
13
|
s.description = 'A cloud based distributed cron, application framework and operations console. Naf works as a distributed script running ' +
|
14
14
|
'system that provides scheduling, logging, alarming, machine redundancy, and the ability to set constraint during script execution'
|
data/spec/factories/naf.rb
CHANGED
@@ -176,9 +176,9 @@ FactoryGirl.define do
|
|
176
176
|
sequence(:application_run_group_name) { |n| "Run Group #{n}" }
|
177
177
|
end
|
178
178
|
|
179
|
-
|
180
|
-
#######
|
181
|
-
|
179
|
+
#############################################
|
180
|
+
####### Run Interval Style ################
|
181
|
+
#############################################
|
182
182
|
|
183
183
|
factory :run_interval_style, class: ::Naf::RunIntervalStyle do
|
184
184
|
name 'at beginning of day'
|
@@ -34,19 +34,61 @@ module Logical::Naf::ConstructionZone
|
|
34
34
|
|
35
35
|
describe '#enqueue_application' do
|
36
36
|
let(:application) { FactoryGirl.create(:application) }
|
37
|
+
let(:prereq) { FactoryGirl.create(:job) }
|
37
38
|
let!(:job) {
|
38
39
|
boss.enqueue_application(application,
|
39
40
|
::Naf::ApplicationRunGroupRestriction.no_limit,
|
40
|
-
application.command
|
41
|
+
application.command,
|
42
|
+
5,
|
43
|
+
1,
|
44
|
+
[::Naf::Affinity.first],
|
45
|
+
[prereq],
|
46
|
+
true)
|
41
47
|
}
|
42
48
|
|
43
|
-
it_should_behave_like 'create one historical job',
|
49
|
+
it_should_behave_like 'create one historical job', 2
|
50
|
+
|
51
|
+
it 'assign run group restriction correctly' do
|
52
|
+
job.application_run_group_restriction_id.should == ::Naf::ApplicationRunGroupRestriction.no_limit.id
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'assign run group name correctly' do
|
56
|
+
job.application_run_group_name.should == application.command
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'assign run group limit correctly' do
|
60
|
+
job.application_run_group_limit.should == 5
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'assign priority correctly' do
|
64
|
+
job.priority.should == 1
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'assign affinities correctly' do
|
68
|
+
job.historical_job_affinity_tabs.map(&:affinity_id).should == [::Naf::Affinity.first.id]
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'assign prerequisites correctly' do
|
72
|
+
job.historical_job_prerequisites.map(&:prerequisite_historical_job_id).should == [prereq.id]
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'assign enqueue_backlogs correctly' do
|
76
|
+
job.application_run_group_name.should == application.command
|
77
|
+
end
|
44
78
|
end
|
45
79
|
|
46
80
|
describe '#enqueue_application_schedule' do
|
47
81
|
let!(:job) { boss.enqueue_application_schedule(FactoryGirl.create(:schedule)) }
|
48
82
|
|
49
83
|
it_should_behave_like 'create one historical job', 1
|
84
|
+
|
85
|
+
it 'create two historical jobs when schedule has a prerequisite' do
|
86
|
+
schedule = FactoryGirl.create(:schedule)
|
87
|
+
FactoryGirl.create(:schedule_prerequisite, application_schedule: schedule)
|
88
|
+
boss.enqueue_application_schedule(schedule)
|
89
|
+
|
90
|
+
::Naf::HistoricalJob.should have(3).records
|
91
|
+
end
|
50
92
|
end
|
51
93
|
|
52
94
|
describe '#enqueue_rails_command' do
|
@@ -82,7 +124,7 @@ module Logical::Naf::ConstructionZone
|
|
82
124
|
|
83
125
|
it 'create two historical jobs when a machine is present' do
|
84
126
|
machine = FactoryGirl.create(:machine)
|
85
|
-
classification = FactoryGirl.create(:
|
127
|
+
classification = FactoryGirl.create(:machine_affinity_classification)
|
86
128
|
FactoryGirl.create(:affinity, id: 5,
|
87
129
|
affinity_name: machine.id.to_s,
|
88
130
|
affinity_classification: classification)
|
@@ -5,6 +5,14 @@ module Naf
|
|
5
5
|
let!(:normal) { FactoryGirl.create(:normal_affinity) }
|
6
6
|
let(:canary) { FactoryGirl.create(:canary_affinity) }
|
7
7
|
let(:perennial) { FactoryGirl.create(:perennial_affinity) }
|
8
|
+
let!(:machine) { FactoryGirl.create(:machine) }
|
9
|
+
let!(:machine_classification) { FactoryGirl.create(:machine_affinity_classification) }
|
10
|
+
let!(:machine_affinity) {
|
11
|
+
FactoryGirl.create(:affinity, id: 5,
|
12
|
+
affinity_classification_id: machine_classification.id,
|
13
|
+
affinity_name: machine.id.to_s)
|
14
|
+
}
|
15
|
+
|
8
16
|
|
9
17
|
# Mass-assignment
|
10
18
|
[:affinity_classification_id,
|
@@ -94,9 +102,7 @@ module Naf
|
|
94
102
|
|
95
103
|
it 'return proper message when pair value (affinity_classification_id, affinity_name) already exists' do
|
96
104
|
normal.affinity_name = FactoryGirl.create(:machine).id.to_s
|
97
|
-
normal.affinity_classification
|
98
|
-
normal.save
|
99
|
-
normal.affinity_classification.save
|
105
|
+
normal.affinity_classification = machine_classification
|
100
106
|
|
101
107
|
normal.validate_affinity_name.should == 'An affinity with the pair value (affinity_classification_id, affinity_name) already exists!'
|
102
108
|
end
|
@@ -110,6 +116,7 @@ module Naf
|
|
110
116
|
before do
|
111
117
|
canary.update_attributes!(selectable: false)
|
112
118
|
perennial.update_attributes!(selectable: false)
|
119
|
+
machine_affinity.update_attributes!(selectable: false)
|
113
120
|
end
|
114
121
|
|
115
122
|
it "return only selectable affinities" do
|
@@ -117,5 +124,37 @@ module Naf
|
|
117
124
|
end
|
118
125
|
end
|
119
126
|
|
127
|
+
describe "#deleted_machine_affinities" do
|
128
|
+
it "return only affinities that are associated with deleted machines" do
|
129
|
+
machine.update_attributes!(deleted: true, enabled: false)
|
130
|
+
::Naf::Affinity.deleted_machine_affinities.should == [machine_affinity]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
describe "#names_list" do
|
135
|
+
before do
|
136
|
+
canary.update_attributes!(selectable: false)
|
137
|
+
perennial.update_attributes!(selectable: false)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "return affinities not related to machine classification correctly" do
|
141
|
+
::Naf::Affinity.names_list.should == [['purpose, normal', 1], ['0.0.0.1', 5]]
|
142
|
+
end
|
143
|
+
|
144
|
+
it "return affinities related to machine classification correctly when short name is present" do
|
145
|
+
machine_affinity.update_attributes!(affinity_short_name: 'machine_1')
|
146
|
+
::Naf::Affinity.names_list.should == [['purpose, normal', 1], ['machine_1', 5]]
|
147
|
+
end
|
148
|
+
|
149
|
+
it "return affinities related to machine classification correctly when affinity_name is used" do
|
150
|
+
::Naf::Affinity.names_list.should == [['purpose, normal', 1], ['0.0.0.1', 5]]
|
151
|
+
end
|
152
|
+
|
153
|
+
it "return affinities related to machine classification correctly when it is invalid" do
|
154
|
+
machine_affinity.update_attributes!(affinity_name: '100')
|
155
|
+
::Naf::Affinity.names_list.should == [['purpose, normal', 1], ['Bad affinity: machine, 100', 5]]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
120
159
|
end
|
121
160
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: naf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.11
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-04-
|
13
|
+
date: 2014-04-30 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rails
|
@@ -194,6 +194,9 @@ files:
|
|
194
194
|
- app/assets/images/bg-grad.png
|
195
195
|
- app/assets/images/clock.png
|
196
196
|
- app/assets/images/control_play_blue.png
|
197
|
+
- app/assets/images/diagram_affinities.png
|
198
|
+
- app/assets/images/diagram_runners.png
|
199
|
+
- app/assets/images/diagram_running_system.png
|
197
200
|
- app/assets/images/down_arrow.gif
|
198
201
|
- app/assets/images/job.png
|
199
202
|
- app/assets/images/machine.png
|
@@ -484,7 +487,7 @@ files:
|
|
484
487
|
- spec/factories/naf.rb
|
485
488
|
- spec/helpers/naf/application_helper_spec.rb
|
486
489
|
- spec/models/logical/naf/application_spec.rb
|
487
|
-
- spec/models/logical/naf/construction_zone/
|
490
|
+
- spec/models/logical/naf/construction_zone/boss_spec.rb
|
488
491
|
- spec/models/logical/naf/construction_zone/foreman_spec.rb
|
489
492
|
- spec/models/logical/naf/construction_zone/proletariat_spec.rb
|
490
493
|
- spec/models/logical/naf/construction_zone/work_order_spec.rb
|
@@ -603,7 +606,7 @@ test_files:
|
|
603
606
|
- spec/factories/naf.rb
|
604
607
|
- spec/helpers/naf/application_helper_spec.rb
|
605
608
|
- spec/models/logical/naf/application_spec.rb
|
606
|
-
- spec/models/logical/naf/construction_zone/
|
609
|
+
- spec/models/logical/naf/construction_zone/boss_spec.rb
|
607
610
|
- spec/models/logical/naf/construction_zone/foreman_spec.rb
|
608
611
|
- spec/models/logical/naf/construction_zone/proletariat_spec.rb
|
609
612
|
- spec/models/logical/naf/construction_zone/work_order_spec.rb
|