naf 2.1.10 → 2.1.11
Sign up to get free protection for your applications and to get access to all the features.
- data/RELEASE_NOTES.rdoc +12 -0
- data/app/assets/images/diagram_affinities.png +0 -0
- data/app/assets/images/diagram_runners.png +0 -0
- data/app/assets/images/diagram_running_system.png +0 -0
- data/app/controllers/naf/log_parsers_controller.rb +11 -3
- data/app/controllers/naf/log_viewer_controller.rb +2 -2
- data/app/models/logical/naf/construction_zone/boss.rb +34 -8
- data/app/models/logical/naf/log_parser/job.rb +2 -0
- data/app/models/naf/affinity.rb +10 -1
- data/app/models/naf/historical_job.rb +8 -0
- data/app/models/naf/running_job.rb +9 -0
- data/app/models/process/naf/machine_manager.rb +9 -3
- data/app/models/process/naf/runner.rb +70 -48
- data/bin/naf +18 -4
- data/lib/naf/version.rb +1 -1
- data/naf.gemspec +1 -1
- data/spec/factories/naf.rb +3 -3
- data/spec/models/logical/naf/construction_zone/{boss_rspec.rb → boss_spec.rb} +45 -3
- data/spec/models/naf/affinity_spec.rb +42 -3
- metadata +7 -4
data/RELEASE_NOTES.rdoc
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
= Release Notes
|
2
2
|
|
3
|
+
=== Version 2.1.11
|
4
|
+
Bug fixes:
|
5
|
+
* Runner behaves correctly when it tries to delete a non-existing running job
|
6
|
+
* Adding application schedule prerequisites works correctly
|
7
|
+
* Log display outputs custom message when record id is not present
|
8
|
+
|
9
|
+
Changes:
|
10
|
+
* Only show affinities associated with machines that are not deleted
|
11
|
+
* Runner will cleanup other runners after they are dead
|
12
|
+
* Machine manager updates machines row if server address or server name match
|
13
|
+
* Improvements on naf runner script
|
14
|
+
|
3
15
|
=== Version 2.1.10
|
4
16
|
Bug fixes:
|
5
17
|
* LogArchiver correctly removes files and directories
|
Binary file
|
Binary file
|
Binary file
|
@@ -3,10 +3,18 @@ module Naf
|
|
3
3
|
|
4
4
|
def logs
|
5
5
|
if naf_cookie_valid?
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
if params['record_id'].present?
|
7
|
+
response = params['logical_type'].constantize.new(params).logs
|
8
|
+
|
9
|
+
if response.present?
|
10
|
+
success = true
|
11
|
+
else
|
12
|
+
success = false
|
13
|
+
end
|
9
14
|
else
|
15
|
+
response = {
|
16
|
+
logs: ' <span>Record id is not present</br></span>'
|
17
|
+
}
|
10
18
|
success = false
|
11
19
|
end
|
12
20
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Naf
|
2
2
|
class LogViewerController < Naf::ApplicationController
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
def index
|
5
|
+
if params['record_type'] == 'job'
|
6
6
|
@job = ::Naf::HistoricalJob.find_by_id(params['record_id'].to_i)
|
7
7
|
@status = ::Logical::Naf::Job.new(@job).status
|
8
8
|
@partial = 'job_logs'
|
@@ -18,17 +18,43 @@ module Logical::Naf::ConstructionZone
|
|
18
18
|
work_order = ApplicationWorkOrder.new(application,
|
19
19
|
application_run_group_restriction,
|
20
20
|
application_run_group_name,
|
21
|
-
application_run_group_limit
|
22
|
-
priority
|
23
|
-
affinities
|
24
|
-
prerequisites
|
25
|
-
enqueue_backlogs
|
21
|
+
application_run_group_limit,
|
22
|
+
priority,
|
23
|
+
affinities,
|
24
|
+
prerequisites,
|
25
|
+
enqueue_backlogs)
|
26
26
|
@foreman.enqueue(work_order)
|
27
27
|
end
|
28
28
|
|
29
|
-
def enqueue_application_schedule(application_schedule)
|
30
|
-
|
31
|
-
|
29
|
+
def enqueue_application_schedule(application_schedule, schedules_queued_already = [])
|
30
|
+
prerequisite_jobs = []
|
31
|
+
|
32
|
+
# Check if schedule has been queued
|
33
|
+
if schedules_queued_already.include? application_schedule.id
|
34
|
+
raise ::Naf::HistoricalJob::JobPrerequisiteLoop.new(application_schedule)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Keep track of queued schedules
|
38
|
+
schedules_queued_already << application_schedule.id
|
39
|
+
# Queue application schedule prerequisites
|
40
|
+
application_schedule.prerequisites.each do |application_schedule_prerequisite|
|
41
|
+
job = enqueue_application_schedule(application_schedule_prerequisite, schedules_queued_already)
|
42
|
+
if job.present?
|
43
|
+
prerequisite_jobs << job
|
44
|
+
else
|
45
|
+
return
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Queue the application
|
50
|
+
return enqueue_application(application_schedule.application,
|
51
|
+
application_schedule.application_run_group_restriction,
|
52
|
+
application_schedule.application_run_group_name,
|
53
|
+
application_schedule.application_run_group_limit,
|
54
|
+
application_schedule.priority,
|
55
|
+
application_schedule.affinities,
|
56
|
+
prerequisite_jobs,
|
57
|
+
application_schedule.enqueue_backlogs)
|
32
58
|
end
|
33
59
|
|
34
60
|
def enqueue_rails_command(command,
|
data/app/models/naf/affinity.rb
CHANGED
@@ -51,8 +51,17 @@ module Naf
|
|
51
51
|
where(selectable: true)
|
52
52
|
end
|
53
53
|
|
54
|
+
def self.deleted_machine_affinities
|
55
|
+
joins(:affinity_classification).
|
56
|
+
joins("INNER JOIN #{Naf.schema_name}.machines AS m
|
57
|
+
ON CAST (m.id AS TEXT) = #{Naf.schema_name}.affinities.affinity_name").
|
58
|
+
where("#{Naf.schema_name}.affinity_classifications.affinity_classification_name = 'machine' AND
|
59
|
+
m.deleted IS TRUE")
|
60
|
+
end
|
61
|
+
|
54
62
|
def self.names_list
|
55
|
-
|
63
|
+
# Don't include affinity that is associated with a deleted machine
|
64
|
+
(selectable - deleted_machine_affinities).map do |a|
|
56
65
|
classification = a.affinity_classification
|
57
66
|
if classification.affinity_classification_name == 'machine'
|
58
67
|
if a.affinity_short_name.present?
|
@@ -1,5 +1,7 @@
|
|
1
1
|
module Naf
|
2
2
|
class RunningJob < NafBase
|
3
|
+
include PgAdvisoryLocker
|
4
|
+
|
3
5
|
# Protect from mass-assignment issue
|
4
6
|
attr_accessible :application_id,
|
5
7
|
:application_schedule_id,
|
@@ -117,6 +119,13 @@ module Naf
|
|
117
119
|
self.save!
|
118
120
|
end
|
119
121
|
|
122
|
+
def lock_for_runner_use(&block)
|
123
|
+
advisory_lock(&block)
|
124
|
+
end
|
125
|
+
|
126
|
+
def unlock_for_runner_use
|
127
|
+
advisory_unlock
|
128
|
+
end
|
120
129
|
|
121
130
|
end
|
122
131
|
end
|
@@ -28,9 +28,15 @@ module Process::Naf
|
|
28
28
|
machine = ::Naf::Machine.find_by_server_address(@server_address)
|
29
29
|
if machine.blank?
|
30
30
|
server_name = (`hostname`).strip
|
31
|
-
machine = ::Naf::Machine.
|
32
|
-
|
33
|
-
|
31
|
+
machine = ::Naf::Machine.find_by_server_name(server_name)
|
32
|
+
if machine.blank?
|
33
|
+
machine = ::Naf::Machine.create(server_address: @server_address,
|
34
|
+
server_name: server_name)
|
35
|
+
add_default_affinities(machine)
|
36
|
+
else
|
37
|
+
machine.server_address = @server_address
|
38
|
+
machine.save!
|
39
|
+
end
|
34
40
|
end
|
35
41
|
|
36
42
|
machine.server_note = @server_note unless @server_note.nil?
|
@@ -4,7 +4,8 @@ module Process::Naf
|
|
4
4
|
class Runner < ::Af::Application
|
5
5
|
|
6
6
|
attr_accessor :machine,
|
7
|
-
:current_invocation
|
7
|
+
:current_invocation,
|
8
|
+
:last_cleaned_up_processes
|
8
9
|
|
9
10
|
#----------------
|
10
11
|
# *** Options ***
|
@@ -64,14 +65,13 @@ module Process::Naf
|
|
64
65
|
@machine = ::Naf::Machine.find_by_server_address(@server_address)
|
65
66
|
|
66
67
|
unless machine.present?
|
67
|
-
logger.fatal "This machine is not configued correctly (ipaddress: #{@server_address})."
|
68
|
-
logger.fatal "Please update #{::Naf::Machine.table_name} with an entry for this machine."
|
68
|
+
logger.fatal escape_html("This machine is not configued correctly (ipaddress: #{@server_address}).")
|
69
|
+
logger.fatal escape_html("Please update #{::Naf::Machine.table_name} with an entry for this machine.")
|
69
70
|
logger.fatal "Exiting..."
|
70
71
|
exit 1
|
71
72
|
end
|
72
73
|
|
73
|
-
machine.lock_for_runner_use
|
74
|
-
begin
|
74
|
+
machine.lock_for_runner_use do
|
75
75
|
cleanup_old_processes
|
76
76
|
remove_invalid_running_jobs
|
77
77
|
wind_down_runners
|
@@ -85,8 +85,6 @@ module Process::Naf
|
|
85
85
|
create!({ machine_runner_id: machine_runner.id,
|
86
86
|
pid: Process.pid,
|
87
87
|
uuid: @invocation_uuid }.merge!(retrieve_invocation_information))
|
88
|
-
ensure
|
89
|
-
machine.unlock_for_runner_use
|
90
88
|
end
|
91
89
|
|
92
90
|
begin
|
@@ -99,14 +97,17 @@ module Process::Naf
|
|
99
97
|
end
|
100
98
|
|
101
99
|
def remove_invalid_running_jobs
|
100
|
+
logger.debug "looking for invalid running jobs"
|
102
101
|
::Naf::RunningJob.
|
103
102
|
joins("INNER JOIN #{Naf.schema_name}.historical_jobs AS hj ON hj.id = #{Naf.schema_name}.running_jobs.id").
|
104
103
|
where('finished_at IS NOT NULL AND hj.started_on_machine_id = ?', @machine.id).readonly(false).each do |job|
|
104
|
+
logger.debug escape_html("removing invalid job #{job.inspect}")
|
105
105
|
job.delete
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
109
|
def check_gc_configurations
|
110
|
+
logger.debug "checking garbage collection configurations"
|
110
111
|
unless @disable_gc_modifications
|
111
112
|
# These configuration changes will help forked processes, not the runner
|
112
113
|
ENV['RUBY_HEAP_MIN_SLOTS'] = '500000'
|
@@ -116,9 +117,11 @@ module Process::Naf
|
|
116
117
|
end
|
117
118
|
end
|
118
119
|
|
119
|
-
def cleanup_old_processes
|
120
|
-
|
121
|
-
|
120
|
+
def cleanup_old_processes(created_at_interval = 1.month, marked_dead_interval = 24.hours)
|
121
|
+
@last_cleaned_up_processes = Time.zone.now
|
122
|
+
logger.debug "cleaning up old processes"
|
123
|
+
::Naf::MachineRunner.where("created_at >= ?", Time.zone.now - created_at_interval).each do |runner|
|
124
|
+
runner.machine_runner_invocations.recently_marked_dead(marked_dead_interval).each do |invocation|
|
122
125
|
terminate_old_processes(invocation)
|
123
126
|
end
|
124
127
|
end
|
@@ -130,11 +133,11 @@ module Process::Naf
|
|
130
133
|
if invocation.dead_at.blank?
|
131
134
|
begin
|
132
135
|
retval = Process.kill(0, invocation.pid)
|
133
|
-
logger.detail "#{retval} = kill(0, #{invocation.pid}) -- process alive, marking runner invocation as winding down"
|
136
|
+
logger.detail escape_html("#{retval} = kill(0, #{invocation.pid}) -- process alive, marking runner invocation as winding down")
|
134
137
|
invocation.wind_down_at = Time.zone.now
|
135
138
|
invocation.save!
|
136
139
|
rescue Errno::ESRCH
|
137
|
-
logger.detail "ESRCH = kill(0, #{invocation.pid}) -- marking runner invocation as not running"
|
140
|
+
logger.detail escape_html("ESRCH = kill(0, #{invocation.pid}) -- marking runner invocation as not running")
|
138
141
|
invocation.dead_at = Time.zone.now
|
139
142
|
invocation.save!
|
140
143
|
terminate_old_processes(invocation)
|
@@ -211,6 +214,7 @@ module Process::Naf
|
|
211
214
|
return false
|
212
215
|
end
|
213
216
|
|
217
|
+
logger.debug "marking machine alive"
|
214
218
|
machine.mark_alive
|
215
219
|
|
216
220
|
check_log_level
|
@@ -227,6 +231,7 @@ module Process::Naf
|
|
227
231
|
end
|
228
232
|
|
229
233
|
cleanup_dead_children
|
234
|
+
cleanup_old_processes(1.week, 75.minutes) if (Time.zone.now - @last_cleaned_up_processes) > 1.hour
|
230
235
|
|
231
236
|
return true
|
232
237
|
end
|
@@ -298,7 +303,7 @@ module Process::Naf
|
|
298
303
|
logger.warn e
|
299
304
|
pid = @children.first.try(:first)
|
300
305
|
status = nil
|
301
|
-
logger.warn "pulling first child off list to clean it up: pid=#{pid}"
|
306
|
+
logger.warn escape_html("pulling first child off list to clean it up: pid=#{pid}")
|
302
307
|
end
|
303
308
|
|
304
309
|
if pid
|
@@ -327,7 +332,7 @@ module Process::Naf
|
|
327
332
|
def check_dead_children_not_exited_properly
|
328
333
|
dead_children = []
|
329
334
|
@children.each do |pid, child|
|
330
|
-
unless is_job_process_alive?(child
|
335
|
+
unless is_job_process_alive?(child)
|
331
336
|
dead_children << child
|
332
337
|
end
|
333
338
|
end
|
@@ -346,27 +351,25 @@ module Process::Naf
|
|
346
351
|
child_job.remove_tags([::Naf::HistoricalJob::SYSTEM_TAGS[:work]])
|
347
352
|
|
348
353
|
if status.nil? || status.exited? || status.signaled?
|
349
|
-
logger.info { escape_html("cleaning up dead child: #{child_job.
|
354
|
+
logger.info { escape_html("cleaning up dead child: #{child_job.inspect}") }
|
350
355
|
finish_job(child_job,
|
351
356
|
{ exit_status: (status && status.exitstatus), termination_signal: (status && status.termsig) })
|
352
357
|
else
|
353
358
|
# this can happen if the child is sigstopped
|
354
|
-
logger.warn escape_html("child waited for did not exit: #{child_job}, status: #{status.inspect}")
|
359
|
+
logger.warn escape_html("child waited for did not exit: #{child_job.inspect}, status: #{status.inspect}")
|
355
360
|
end
|
356
361
|
else
|
357
362
|
# XXX ERROR no child for returned pid -- this can't happen
|
358
|
-
logger.warn "child pid: #{pid}, status: #{status.inspect}, not managed by this runner"
|
363
|
+
logger.warn escape_html("child pid: #{pid}, status: #{status.inspect}, not managed by this runner")
|
359
364
|
end
|
360
365
|
end
|
361
366
|
|
362
367
|
def start_new_jobs
|
363
|
-
|
364
|
-
logger.detail "starting new jobs, num children: #{@children.length}/#{machine.thread_pool_size}"
|
365
|
-
# XXX while @children.length < machine.thread_pool_size && memory_available_to_spawn? && current_invocation.wind_down_at.blank?
|
368
|
+
logger.detail escape_html("starting new jobs, num children: #{@children.length}/#{machine.thread_pool_size}")
|
366
369
|
while ::Naf::RunningJob.where(started_on_machine_id: machine.id).count < machine.thread_pool_size &&
|
367
370
|
memory_available_to_spawn? && current_invocation.wind_down_at.blank?
|
368
371
|
|
369
|
-
logger.debug_gross "fetching jobs because: children: #{@children.length} < #{machine.thread_pool_size} (poolsize)"
|
372
|
+
logger.debug_gross escape_html("fetching jobs because: children: #{@children.length} < #{machine.thread_pool_size} (poolsize)")
|
370
373
|
begin
|
371
374
|
running_job = @job_fetcher.fetch_next_job
|
372
375
|
|
@@ -384,12 +387,12 @@ module Process::Naf
|
|
384
387
|
running_job.historical_job.pid = pid
|
385
388
|
running_job.historical_job.failed_to_start = false
|
386
389
|
running_job.historical_job.machine_runner_invocation_id = current_invocation.id
|
387
|
-
logger.info escape_html("job started : #{running_job}")
|
388
390
|
running_job.save!
|
389
391
|
running_job.historical_job.save!
|
392
|
+
logger.info escape_html("job started : #{running_job.inspect}")
|
390
393
|
else
|
391
394
|
# should never get here (well, hopefully)
|
392
|
-
logger.error escape_html("#{machine}: failed to execute #{running_job}")
|
395
|
+
logger.error escape_html("#{machine}: failed to execute #{running_job.inspect}")
|
393
396
|
|
394
397
|
finish_job(running_job, { failed_to_start: true })
|
395
398
|
end
|
@@ -404,7 +407,7 @@ module Process::Naf
|
|
404
407
|
logger.debug_gross "done starting jobs"
|
405
408
|
end
|
406
409
|
|
407
|
-
#
|
410
|
+
# update_all doesn't support "from_partition" so we have this helper
|
408
411
|
def update_historical_job(updates, historical_job_id)
|
409
412
|
updates[:updated_at] = Time.zone.now
|
410
413
|
update_columns = updates.map{ |k,v| "#{k} = ?" }.join(", ")
|
@@ -420,14 +423,25 @@ module Process::Naf
|
|
420
423
|
end
|
421
424
|
|
422
425
|
def finish_job(running_job, updates = {})
|
423
|
-
if
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
426
|
+
# Check to see if running job still exists
|
427
|
+
job = ::Naf::RunningJob.find_by_id(running_job.id)
|
428
|
+
if job.present?
|
429
|
+
job.lock_for_runner_use do
|
430
|
+
::Naf::HistoricalJob.transaction do
|
431
|
+
update_historical_job(updates.merge({ finished_at: Time.zone.now }), job.id)
|
432
|
+
job.delete
|
433
|
+
end
|
434
|
+
end
|
435
|
+
else
|
436
|
+
job = ::Naf::HistoricalJob.find_by_id(running_job.id)
|
437
|
+
# This does not seem to be need, but just for extra measure
|
438
|
+
if job.present?
|
439
|
+
job.lock_for_runner_use do
|
440
|
+
::Naf::HistoricalJob.transaction do
|
441
|
+
update_historical_job(updates.merge({ finished_at: Time.zone.now }), job.id)
|
442
|
+
end
|
443
|
+
end
|
444
|
+
end
|
431
445
|
end
|
432
446
|
end
|
433
447
|
|
@@ -439,7 +453,10 @@ module Process::Naf
|
|
439
453
|
@children.clone.each do |pid, child|
|
440
454
|
send_signal_and_maybe_clean_up(child, "TERM")
|
441
455
|
end
|
456
|
+
|
457
|
+
# Wait 2 seconds
|
442
458
|
sleep(2)
|
459
|
+
|
443
460
|
@children.clone.each do |pid, child|
|
444
461
|
send_signal_and_maybe_clean_up(child, "KILL")
|
445
462
|
|
@@ -456,9 +473,10 @@ module Process::Naf
|
|
456
473
|
logger.detail "no jobs to remove"
|
457
474
|
return
|
458
475
|
end
|
476
|
+
|
459
477
|
logger.info "number of old jobs to sift through: #{jobs.length}"
|
460
478
|
jobs.each do |job|
|
461
|
-
logger.detail escape_html("job still around: #{job}")
|
479
|
+
logger.detail escape_html("job still around: #{job.inspect}")
|
462
480
|
if job.request_to_terminate == false
|
463
481
|
logger.warn "politely asking process: #{job.pid} to terminate itself"
|
464
482
|
job.request_to_terminate = true
|
@@ -482,7 +500,7 @@ module Process::Naf
|
|
482
500
|
return
|
483
501
|
end
|
484
502
|
jobs.each do |job|
|
485
|
-
logger.warn escape_html("sending SIG_TERM to process: #{job}")
|
503
|
+
logger.warn escape_html("sending SIG_TERM to process: #{job.inspect}")
|
486
504
|
send_signal_and_maybe_clean_up(job, "TERM")
|
487
505
|
end
|
488
506
|
|
@@ -496,7 +514,7 @@ module Process::Naf
|
|
496
514
|
|
497
515
|
# kill with fire
|
498
516
|
assigned_jobs(record).each do |job|
|
499
|
-
logger.alarm escape_html("sending SIG_KILL to process: #{job}")
|
517
|
+
logger.alarm escape_html("sending SIG_KILL to process: #{job.inspect}")
|
500
518
|
send_signal_and_maybe_clean_up(job, "KILL")
|
501
519
|
|
502
520
|
# job force job down
|
@@ -522,6 +540,7 @@ module Process::Naf
|
|
522
540
|
|
523
541
|
return false
|
524
542
|
end
|
543
|
+
|
525
544
|
return true
|
526
545
|
end
|
527
546
|
|
@@ -545,9 +564,22 @@ module Process::Naf
|
|
545
564
|
Facter.clear
|
546
565
|
memory_size = Facter.memorysize_mb.to_f
|
547
566
|
memory_free = Facter.memoryfree_mb.to_f
|
567
|
+
memory_free_percentage = ((memory_free + sreclaimable_memory) / memory_size) * 100.0
|
568
|
+
|
569
|
+
if (memory_free_percentage >= @minimum_memory_free)
|
570
|
+
logger.detail "memory available: #{memory_free_percentage}% (free) >= " +
|
571
|
+
"#{@minimum_memory_free}% (min percent)"
|
572
|
+
return true
|
573
|
+
end
|
574
|
+
logger.alarm "#{Facter.hostname}.#{Facter.domain}: not enough memory to spawn: " +
|
575
|
+
"#{memory_free_percentage}% (free) < #{@minimum_memory_free}% (min percent)"
|
548
576
|
|
549
|
-
|
550
|
-
|
577
|
+
return false
|
578
|
+
end
|
579
|
+
|
580
|
+
# Linux breaks out kernel cache-use memory into an SReclaimable stat
|
581
|
+
# in /proc/meminfo which should be counted as free, but facter does not.
|
582
|
+
def sreclaimable_memory
|
551
583
|
sreclaimable = 0.0
|
552
584
|
begin
|
553
585
|
File.readlines('/proc/meminfo').each do |l|
|
@@ -561,17 +593,7 @@ module Process::Naf
|
|
561
593
|
rescue
|
562
594
|
end
|
563
595
|
|
564
|
-
|
565
|
-
|
566
|
-
if (memory_free_percentage >= @minimum_memory_free)
|
567
|
-
logger.detail "memory available: #{memory_free_percentage}% (free) >= " +
|
568
|
-
"#{@minimum_memory_free}% (min percent)"
|
569
|
-
return true
|
570
|
-
end
|
571
|
-
logger.alarm "#{Facter.hostname}.#{Facter.domain}: not enough memory to spawn: " +
|
572
|
-
"#{memory_free_percentage}% (free) < #{@minimum_memory_free}% (min percent)"
|
573
|
-
|
574
|
-
return false
|
596
|
+
sreclaimable
|
575
597
|
end
|
576
598
|
|
577
599
|
def escape_html(str)
|
data/bin/naf
CHANGED
@@ -5,22 +5,36 @@ action, option = ARGV
|
|
5
5
|
if action == 'runner'
|
6
6
|
if option == 'up'
|
7
7
|
puts "Bringing up the runner(s)..."
|
8
|
-
`screen -d -m bash -c 'source /root/.bash_profile && cd /root/current && a=\`uuidgen\` &&
|
8
|
+
`screen -d -m bash -c 'source /root/.bash_profile && cd /root/current && a=\`uuidgen\` &&
|
9
|
+
script/rails runner ::Process::Naf::Runner.run --invocation-uuid $a 2>&1 |
|
9
10
|
script/rails runner ::Process::Naf::Logger::RunnerLog.run --invocation-uuid $a'`
|
10
11
|
|
11
12
|
elsif option == 'status'
|
12
13
|
num_runners = Integer(`ps -ef | grep Process::Naf::Runner.run | grep -v grep | grep -v uuidgen | wc -l`.strip)
|
13
14
|
hostname = `hostname`.strip
|
14
15
|
if num_runners == 0
|
15
|
-
puts "down: #{hostname}"
|
16
|
+
puts "Runner down on host: #{hostname}"
|
16
17
|
elsif num_runners == 1
|
17
|
-
puts "up: #{hostname}"
|
18
|
+
puts "Runner up on host: #{hostname}"
|
18
19
|
else
|
19
|
-
puts "up
|
20
|
+
puts "1 runner up, #{num_runners - 1} runner(s) winding down on host: #{hostname}"
|
20
21
|
end
|
21
22
|
|
22
23
|
elsif option == 'down'
|
23
24
|
puts 'Bringing down runner(s)...'
|
24
25
|
`kill $(ps -ef | grep Process::Naf::Runner.run | grep -v grep | grep -v uuidgen | awk '{ print $2 }') | cat`
|
26
|
+
|
27
|
+
elsif option == '--?' || option == '--help'
|
28
|
+
puts "DESCRIPTION\n\tThe following options are available:\n\n" +
|
29
|
+
"\tup\t->\tBrings up new Runner and RunnerLog processes on the host.\n\n" +
|
30
|
+
"\tstatus\t->\tLists the status of the runner based on unix process status. The runner can be down, up, or up/winding down.\n\n" +
|
31
|
+
"\tdown\t->\tTakes down the runner by sending a kill signal to the processes specified by the pid operand."
|
32
|
+
else
|
33
|
+
puts "Sorry, option \'#{option}\' is not available. Available options: up, status, down, --?, --help"
|
25
34
|
end
|
35
|
+
elsif action == '--?' || action == '--help'
|
36
|
+
puts "DESCRIPTION\n\tThe following actions are available:\n\n" +
|
37
|
+
"\trunner\t->\tControls the Naf runner."
|
38
|
+
else
|
39
|
+
puts "Sorry, action \'#{action}\' is not available. Available actions: runner, --?, --help"
|
26
40
|
end
|
data/lib/naf/version.rb
CHANGED
data/naf.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.name = 'naf'
|
9
9
|
s.version = Naf::VERSION
|
10
10
|
s.license = 'New BSD License'
|
11
|
-
s.date = '2014-04-
|
11
|
+
s.date = '2014-04-30'
|
12
12
|
s.summary = 'Creates infrastructure for a customizable and robust Postgres-backed script scheduling/running'
|
13
13
|
s.description = 'A cloud based distributed cron, application framework and operations console. Naf works as a distributed script running ' +
|
14
14
|
'system that provides scheduling, logging, alarming, machine redundancy, and the ability to set constraint during script execution'
|
data/spec/factories/naf.rb
CHANGED
@@ -176,9 +176,9 @@ FactoryGirl.define do
|
|
176
176
|
sequence(:application_run_group_name) { |n| "Run Group #{n}" }
|
177
177
|
end
|
178
178
|
|
179
|
-
|
180
|
-
#######
|
181
|
-
|
179
|
+
#############################################
|
180
|
+
####### Run Interval Style ################
|
181
|
+
#############################################
|
182
182
|
|
183
183
|
factory :run_interval_style, class: ::Naf::RunIntervalStyle do
|
184
184
|
name 'at beginning of day'
|
@@ -34,19 +34,61 @@ module Logical::Naf::ConstructionZone
|
|
34
34
|
|
35
35
|
describe '#enqueue_application' do
|
36
36
|
let(:application) { FactoryGirl.create(:application) }
|
37
|
+
let(:prereq) { FactoryGirl.create(:job) }
|
37
38
|
let!(:job) {
|
38
39
|
boss.enqueue_application(application,
|
39
40
|
::Naf::ApplicationRunGroupRestriction.no_limit,
|
40
|
-
application.command
|
41
|
+
application.command,
|
42
|
+
5,
|
43
|
+
1,
|
44
|
+
[::Naf::Affinity.first],
|
45
|
+
[prereq],
|
46
|
+
true)
|
41
47
|
}
|
42
48
|
|
43
|
-
it_should_behave_like 'create one historical job',
|
49
|
+
it_should_behave_like 'create one historical job', 2
|
50
|
+
|
51
|
+
it 'assign run group restriction correctly' do
|
52
|
+
job.application_run_group_restriction_id.should == ::Naf::ApplicationRunGroupRestriction.no_limit.id
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'assign run group name correctly' do
|
56
|
+
job.application_run_group_name.should == application.command
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'assign run group limit correctly' do
|
60
|
+
job.application_run_group_limit.should == 5
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'assign priority correctly' do
|
64
|
+
job.priority.should == 1
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'assign affinities correctly' do
|
68
|
+
job.historical_job_affinity_tabs.map(&:affinity_id).should == [::Naf::Affinity.first.id]
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'assign prerequisites correctly' do
|
72
|
+
job.historical_job_prerequisites.map(&:prerequisite_historical_job_id).should == [prereq.id]
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'assign enqueue_backlogs correctly' do
|
76
|
+
job.application_run_group_name.should == application.command
|
77
|
+
end
|
44
78
|
end
|
45
79
|
|
46
80
|
describe '#enqueue_application_schedule' do
|
47
81
|
let!(:job) { boss.enqueue_application_schedule(FactoryGirl.create(:schedule)) }
|
48
82
|
|
49
83
|
it_should_behave_like 'create one historical job', 1
|
84
|
+
|
85
|
+
it 'create two historical jobs when schedule has a prerequisite' do
|
86
|
+
schedule = FactoryGirl.create(:schedule)
|
87
|
+
FactoryGirl.create(:schedule_prerequisite, application_schedule: schedule)
|
88
|
+
boss.enqueue_application_schedule(schedule)
|
89
|
+
|
90
|
+
::Naf::HistoricalJob.should have(3).records
|
91
|
+
end
|
50
92
|
end
|
51
93
|
|
52
94
|
describe '#enqueue_rails_command' do
|
@@ -82,7 +124,7 @@ module Logical::Naf::ConstructionZone
|
|
82
124
|
|
83
125
|
it 'create two historical jobs when a machine is present' do
|
84
126
|
machine = FactoryGirl.create(:machine)
|
85
|
-
classification = FactoryGirl.create(:
|
127
|
+
classification = FactoryGirl.create(:machine_affinity_classification)
|
86
128
|
FactoryGirl.create(:affinity, id: 5,
|
87
129
|
affinity_name: machine.id.to_s,
|
88
130
|
affinity_classification: classification)
|
@@ -5,6 +5,14 @@ module Naf
|
|
5
5
|
let!(:normal) { FactoryGirl.create(:normal_affinity) }
|
6
6
|
let(:canary) { FactoryGirl.create(:canary_affinity) }
|
7
7
|
let(:perennial) { FactoryGirl.create(:perennial_affinity) }
|
8
|
+
let!(:machine) { FactoryGirl.create(:machine) }
|
9
|
+
let!(:machine_classification) { FactoryGirl.create(:machine_affinity_classification) }
|
10
|
+
let!(:machine_affinity) {
|
11
|
+
FactoryGirl.create(:affinity, id: 5,
|
12
|
+
affinity_classification_id: machine_classification.id,
|
13
|
+
affinity_name: machine.id.to_s)
|
14
|
+
}
|
15
|
+
|
8
16
|
|
9
17
|
# Mass-assignment
|
10
18
|
[:affinity_classification_id,
|
@@ -94,9 +102,7 @@ module Naf
|
|
94
102
|
|
95
103
|
it 'return proper message when pair value (affinity_classification_id, affinity_name) already exists' do
|
96
104
|
normal.affinity_name = FactoryGirl.create(:machine).id.to_s
|
97
|
-
normal.affinity_classification
|
98
|
-
normal.save
|
99
|
-
normal.affinity_classification.save
|
105
|
+
normal.affinity_classification = machine_classification
|
100
106
|
|
101
107
|
normal.validate_affinity_name.should == 'An affinity with the pair value (affinity_classification_id, affinity_name) already exists!'
|
102
108
|
end
|
@@ -110,6 +116,7 @@ module Naf
|
|
110
116
|
before do
|
111
117
|
canary.update_attributes!(selectable: false)
|
112
118
|
perennial.update_attributes!(selectable: false)
|
119
|
+
machine_affinity.update_attributes!(selectable: false)
|
113
120
|
end
|
114
121
|
|
115
122
|
it "return only selectable affinities" do
|
@@ -117,5 +124,37 @@ module Naf
|
|
117
124
|
end
|
118
125
|
end
|
119
126
|
|
127
|
+
describe "#deleted_machine_affinities" do
|
128
|
+
it "return only affinities that are associated with deleted machines" do
|
129
|
+
machine.update_attributes!(deleted: true, enabled: false)
|
130
|
+
::Naf::Affinity.deleted_machine_affinities.should == [machine_affinity]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
describe "#names_list" do
|
135
|
+
before do
|
136
|
+
canary.update_attributes!(selectable: false)
|
137
|
+
perennial.update_attributes!(selectable: false)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "return affinities not related to machine classification correctly" do
|
141
|
+
::Naf::Affinity.names_list.should == [['purpose, normal', 1], ['0.0.0.1', 5]]
|
142
|
+
end
|
143
|
+
|
144
|
+
it "return affinities related to machine classification correctly when short name is present" do
|
145
|
+
machine_affinity.update_attributes!(affinity_short_name: 'machine_1')
|
146
|
+
::Naf::Affinity.names_list.should == [['purpose, normal', 1], ['machine_1', 5]]
|
147
|
+
end
|
148
|
+
|
149
|
+
it "return affinities related to machine classification correctly when affinity_name is used" do
|
150
|
+
::Naf::Affinity.names_list.should == [['purpose, normal', 1], ['0.0.0.1', 5]]
|
151
|
+
end
|
152
|
+
|
153
|
+
it "return affinities related to machine classification correctly when it is invalid" do
|
154
|
+
machine_affinity.update_attributes!(affinity_name: '100')
|
155
|
+
::Naf::Affinity.names_list.should == [['purpose, normal', 1], ['Bad affinity: machine, 100', 5]]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
120
159
|
end
|
121
160
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: naf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.11
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-04-
|
13
|
+
date: 2014-04-30 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rails
|
@@ -194,6 +194,9 @@ files:
|
|
194
194
|
- app/assets/images/bg-grad.png
|
195
195
|
- app/assets/images/clock.png
|
196
196
|
- app/assets/images/control_play_blue.png
|
197
|
+
- app/assets/images/diagram_affinities.png
|
198
|
+
- app/assets/images/diagram_runners.png
|
199
|
+
- app/assets/images/diagram_running_system.png
|
197
200
|
- app/assets/images/down_arrow.gif
|
198
201
|
- app/assets/images/job.png
|
199
202
|
- app/assets/images/machine.png
|
@@ -484,7 +487,7 @@ files:
|
|
484
487
|
- spec/factories/naf.rb
|
485
488
|
- spec/helpers/naf/application_helper_spec.rb
|
486
489
|
- spec/models/logical/naf/application_spec.rb
|
487
|
-
- spec/models/logical/naf/construction_zone/
|
490
|
+
- spec/models/logical/naf/construction_zone/boss_spec.rb
|
488
491
|
- spec/models/logical/naf/construction_zone/foreman_spec.rb
|
489
492
|
- spec/models/logical/naf/construction_zone/proletariat_spec.rb
|
490
493
|
- spec/models/logical/naf/construction_zone/work_order_spec.rb
|
@@ -603,7 +606,7 @@ test_files:
|
|
603
606
|
- spec/factories/naf.rb
|
604
607
|
- spec/helpers/naf/application_helper_spec.rb
|
605
608
|
- spec/models/logical/naf/application_spec.rb
|
606
|
-
- spec/models/logical/naf/construction_zone/
|
609
|
+
- spec/models/logical/naf/construction_zone/boss_spec.rb
|
607
610
|
- spec/models/logical/naf/construction_zone/foreman_spec.rb
|
608
611
|
- spec/models/logical/naf/construction_zone/proletariat_spec.rb
|
609
612
|
- spec/models/logical/naf/construction_zone/work_order_spec.rb
|