vayacondios-server 0.2.11 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. data/.gitignore +3 -1
  2. data/.travis.yml +2 -0
  3. data/Gemfile +15 -9
  4. data/LICENSE.md +2 -6
  5. data/Procfile +1 -1
  6. data/README.md +656 -111
  7. data/Rakefile +89 -6
  8. data/bin/vcd +10 -0
  9. data/bin/vcd-server +8 -0
  10. data/config/database.yml +6 -0
  11. data/config/spec.example.yml +18 -0
  12. data/config/vayacondios.example.yml +15 -0
  13. data/config/vcd-server.rb +37 -0
  14. data/examples/configuration.rb +56 -0
  15. data/examples/event_stream.rb +19 -0
  16. data/examples/simple.rb +61 -0
  17. data/features/event.feature +319 -0
  18. data/features/events.feature +208 -0
  19. data/features/stash.feature +840 -0
  20. data/features/stashes.feature +492 -0
  21. data/features/step_definitions/stash_steps.rb +113 -0
  22. data/features/stream.feature +30 -0
  23. data/features/support/em.rb +14 -0
  24. data/features/support/env.rb +13 -0
  25. data/lib/vayacondios/configuration.rb +63 -0
  26. data/lib/vayacondios/server/api.rb +126 -0
  27. data/lib/vayacondios/server/api_options.rb +56 -0
  28. data/lib/vayacondios/server/configuration.rb +23 -0
  29. data/lib/vayacondios/server/driver.rb +71 -0
  30. data/lib/vayacondios/server/drivers/mongo.rb +126 -0
  31. data/lib/vayacondios/server/handlers/document_handler.rb +81 -0
  32. data/lib/vayacondios/server/handlers/event_handler.rb +31 -26
  33. data/lib/vayacondios/server/handlers/events_handler.rb +31 -0
  34. data/lib/vayacondios/server/handlers/stash_handler.rb +69 -0
  35. data/lib/vayacondios/server/handlers/stashes_handler.rb +49 -0
  36. data/lib/vayacondios/server/handlers/stream_handler.rb +39 -0
  37. data/lib/vayacondios/server/models/document.rb +87 -0
  38. data/lib/vayacondios/server/models/event.rb +198 -0
  39. data/lib/vayacondios/server/models/stash.rb +100 -0
  40. data/lib/vayacondios/server.rb +35 -0
  41. data/lib/vayacondios-server.rb +19 -13
  42. data/lib/vayacondios.rb +22 -0
  43. data/pom.xml +124 -4
  44. data/spec/configuration_spec.rb +41 -0
  45. data/spec/server/api_options_spec.rb +32 -0
  46. data/spec/server/api_spec.rb +279 -0
  47. data/spec/server/configuration_spec.rb +27 -0
  48. data/spec/server/drivers/mongo_spec.rb +107 -0
  49. data/spec/server/handlers/event_handler_spec.rb +62 -0
  50. data/spec/server/handlers/events_handler_spec.rb +51 -0
  51. data/spec/server/handlers/stash_handler_spec.rb +68 -0
  52. data/spec/server/handlers/stashes_handler_spec.rb +50 -0
  53. data/spec/server/handlers/stream_handler_spec.rb +5 -0
  54. data/spec/server/models/document_spec.rb +9 -0
  55. data/spec/server/models/event_spec.rb +185 -0
  56. data/spec/server/models/stash_spec.rb +95 -0
  57. data/spec/spec_helper.rb +23 -3
  58. data/spec/support/database_helper.rb +42 -0
  59. data/spec/support/log_helper.rb +19 -0
  60. data/spec/support/shared_context_for_events.rb +22 -0
  61. data/spec/support/shared_context_for_stashes.rb +24 -0
  62. data/spec/support/shared_examples_for_handlers.rb +32 -0
  63. data/src/main/java/com/infochimps/vayacondios/BaseClient.java +342 -0
  64. data/src/main/java/com/infochimps/vayacondios/HTTPClient.java +426 -0
  65. data/src/main/java/com/infochimps/vayacondios/VayacondiosClient.java +487 -65
  66. data/src/main/java/com/infochimps/vayacondios/test/IntegrationTest.java +3 -0
  67. data/src/test/java/com/infochimps/vayacondios/BaseClientTest.java +50 -0
  68. data/src/test/java/com/infochimps/vayacondios/HTTPClientIT.java +267 -0
  69. data/vayacondios-server.gemspec +9 -9
  70. metadata +127 -122
  71. checksums.yaml +0 -15
  72. data/.rspec +0 -2
  73. data/.yardopts +0 -10
  74. data/Guardfile +0 -41
  75. data/app/http_shim.rb +0 -71
  76. data/bin/vcd.sh +0 -27
  77. data/config/http_shim.rb +0 -43
  78. data/config/vayacondios.example.yaml +0 -7
  79. data/config/vayacondios.yaml +0 -7
  80. data/examples/java/ItemSetTest.java +0 -76
  81. data/lib/tasks/publish.rake +0 -23
  82. data/lib/tasks/spec.rake +0 -11
  83. data/lib/tasks/yard.rake +0 -2
  84. data/lib/vayacondios/client/config.rb +0 -7
  85. data/lib/vayacondios/client/configliere.rb +0 -38
  86. data/lib/vayacondios/client/cube_client.rb +0 -39
  87. data/lib/vayacondios/client/http_client.rb +0 -49
  88. data/lib/vayacondios/client/itemset.rb +0 -130
  89. data/lib/vayacondios/client/legacy_switch.rb +0 -43
  90. data/lib/vayacondios/client/notifier.rb +0 -123
  91. data/lib/vayacondios/client/zabbix_client.rb +0 -148
  92. data/lib/vayacondios/legacy_switch.rb +0 -43
  93. data/lib/vayacondios/server/errors/bad_request.rb +0 -6
  94. data/lib/vayacondios/server/errors/not_found.rb +0 -6
  95. data/lib/vayacondios/server/handlers/config_handler.rb +0 -32
  96. data/lib/vayacondios/server/handlers/itemset_handler.rb +0 -60
  97. data/lib/vayacondios/server/legacy_switch.rb +0 -43
  98. data/lib/vayacondios/server/model/config_document.rb +0 -89
  99. data/lib/vayacondios/server/model/document.rb +0 -25
  100. data/lib/vayacondios/server/model/event_document.rb +0 -94
  101. data/lib/vayacondios/server/model/itemset_document.rb +0 -126
  102. data/lib/vayacondios/server/rack/extract_methods.rb +0 -35
  103. data/lib/vayacondios/server/rack/jsonize.rb +0 -43
  104. data/lib/vayacondios/server/rack/params.rb +0 -50
  105. data/lib/vayacondios/server/rack/path.rb +0 -23
  106. data/lib/vayacondios/server/rack/path_validation.rb +0 -22
  107. data/lib/vayacondios/version.rb +0 -3
  108. data/lib/vayacondios-client.rb +0 -22
  109. data/scripts/hadoop_monitor/configurable.rb +0 -66
  110. data/scripts/hadoop_monitor/hadoop_attempt_scraper.rb +0 -45
  111. data/scripts/hadoop_monitor/hadoop_client.rb +0 -273
  112. data/scripts/hadoop_monitor/hadoop_monitor.rb +0 -101
  113. data/scripts/hadoop_monitor/hadoopable.rb +0 -65
  114. data/scripts/hadoop_monitor/machine_monitor.rb +0 -115
  115. data/scripts/s3_cataloger/buckets +0 -33
  116. data/scripts/s3_cataloger/foreach_bucket +0 -88
  117. data/scripts/s3_cataloger/parse_ls.py +0 -391
  118. data/spec/client/itemset_legacy_spec.rb +0 -55
  119. data/spec/client/itemset_spec.rb +0 -60
  120. data/spec/client/notifier_spec.rb +0 -120
  121. data/spec/server/config_spec.rb +0 -113
  122. data/spec/server/event_spec.rb +0 -103
  123. data/spec/server/itemset_legacy_spec.rb +0 -320
  124. data/spec/server/itemset_spec.rb +0 -317
  125. data/spec/server/rack/extract_methods_spec.rb +0 -60
  126. data/spec/server/rack/path_spec.rb +0 -36
  127. data/spec/server/rack/path_validation_spec.rb +0 -22
  128. data/spec/server/server_spec.rb +0 -20
  129. data/spec/support/mongo_cleaner.rb +0 -32
  130. data/src/main/java/ItemSetTest.java +0 -76
  131. data/src/main/java/com/infochimps/util/CurrentClass.java +0 -26
  132. data/src/main/java/com/infochimps/util/DebugUtil.java +0 -38
  133. data/src/main/java/com/infochimps/util/HttpHelper.java +0 -181
  134. data/src/main/java/com/infochimps/vayacondios/ItemSets.java +0 -373
  135. data/src/main/java/com/infochimps/vayacondios/LinkToVCD.java +0 -18
  136. data/src/main/java/com/infochimps/vayacondios/MemoryVCDShim.java +0 -84
  137. data/src/main/java/com/infochimps/vayacondios/Organization.java +0 -62
  138. data/src/main/java/com/infochimps/vayacondios/PathBuilder.java +0 -13
  139. data/src/main/java/com/infochimps/vayacondios/StandardVCDLink.java +0 -218
  140. data/src/main/java/com/infochimps/vayacondios/VCDIntegrationTest.java +0 -108
  141. data/src/test/java/com/infochimps/vayacondios/TestVayacondiosInMemory.java +0 -78
  142. data/vayacondios-client.gemspec +0 -25
@@ -1,22 +0,0 @@
1
- class Vayacondios
2
- module Rack
3
- class PathValidation
4
- include Goliath::Rack::AsyncMiddleware
5
-
6
- def initialize(app, opts = {})
7
- @app = app ; @opts = opts
8
- end
9
-
10
- def call(env)
11
- return [400, {}, MultiJson.dump({ error: "Bad Request. Format path is <host>/v1/<org>/event/<topic>" })] unless valid_paths? env[:vayacondios_path]
12
- @app.call(env)
13
- end
14
-
15
- def valid_paths?(path)
16
- # use @opts for validation later
17
- path.nil? ? false : true
18
- end
19
-
20
- end
21
- end
22
- end
@@ -1,3 +0,0 @@
1
- class Vayacondios
2
- VERSION = '0.2.11'
3
- end
@@ -1,22 +0,0 @@
1
- require 'configliere'
2
- require 'multi_json'
3
- require 'net/http'
4
-
5
- require 'gorillib/builder'
6
- require 'gorillib/configurable'
7
- require 'gorillib/enumerable/sum'
8
- require 'gorillib/exception/raisers'
9
- require 'gorillib/hash/deep_compact'
10
- require 'gorillib/hash/deep_merge'
11
- require 'gorillib/hash/keys'
12
- require 'gorillib/logger/log'
13
- require 'gorillib/metaprogramming/class_attribute'
14
- require 'gorillib/object/blank'
15
- require 'gorillib/string/constantize'
16
- require 'gorillib/string/inflections'
17
-
18
- require 'vayacondios/client/http_client'
19
- require 'vayacondios/client/cube_client'
20
- require 'vayacondios/client/zabbix_client'
21
- require 'vayacondios/client/notifier'
22
- require 'vayacondios/client/configliere'
@@ -1,66 +0,0 @@
1
- require 'configliere'
2
- require 'logger'
3
-
4
- class Vayacondios
5
-
6
- module Configurable
7
-
8
- #
9
- # Declare a name CONST_NAME = :const_name
10
- #
11
- def self.declare_name symbol
12
- const_set symbol.to_s.upcase.to_sym, symbol
13
- end
14
-
15
- declare_name :cluster_busy
16
- declare_name :cluster_quiet
17
- declare_name :event
18
- declare_name :time
19
-
20
- attr_reader :logger
21
-
22
- def settings
23
- init_settings
24
- return @settings
25
- end
26
-
27
- def init_settings
28
- return if defined? @settings
29
-
30
- @settings = Configliere::Param.new
31
- @settings.use :env_var, :config_file, :commandline
32
-
33
- @settings.define(:config_file,
34
- description: "Config file location")
35
- @settings.define(:sleep_seconds,
36
- default: 5,
37
- description: "Time to sleep in main loops")
38
- @settings.define(:log_level,
39
- default: "info",
40
- description: "Log level. See standard Logger class")
41
- @settings.define(:mongo_db,
42
- default: 'job_info',
43
- description: "Mongo database to dump hadoop job information into")
44
- @settings.define(:mongo_ip,
45
- default: nil,
46
- description: "IP address of Hadoop monitor node")
47
- @settings.define(:mongo_collection_size,
48
- default: 10 * (1 << 20),
49
- description: ("Size (in bytes) of Mongo job events collection"))
50
-
51
- @settings.resolve!
52
-
53
- if @settings.config_file
54
- @settings.read(@settings.config_file)
55
- @settings.resolve!
56
- end
57
-
58
- @logger = Logger.new(STDERR)
59
- @logger.level = Logger.const_get(@settings.log_level.upcase.to_sym)
60
-
61
- @logger.info "Settings: #{@settings}"
62
-
63
- @settings
64
- end
65
- end
66
- end
@@ -1,45 +0,0 @@
1
- require 'open-uri'
2
- require 'nibbler'
3
- require 'socket'
4
-
5
- class HadoopAttemptScraper < Nibbler
6
- attr_accessor :task_id
7
-
8
- def self.scrape_task(task_id)
9
- task_id = task_id.to_s
10
-
11
- url = "http://#{Socket.gethostname}:50030/taskdetails.jsp?tipid=#{task_id}"
12
- scrape = parse(open(url))
13
- scrape.task_id = task_id
14
-
15
- scrape
16
- end
17
-
18
- elements 'table.jobtasks tbody > tr' => :attempts do
19
- element 'td:nth-child(1)' => 'attempt_id'
20
- element 'td:nth-child(2) a/@href' => 'machine'
21
- element 'td:nth-child(3)' => 'status'
22
- element 'td:nth-child(4)' => 'progress'
23
- element 'td:nth-child(5)' => 'start_time'
24
- element 'td:nth-child(6)' => 'finish_time'
25
- element 'td:nth-child(7)' => 'errors'
26
- end
27
-
28
- def to_attempts
29
- attempts.map do |attempt|
30
- start_time = Time.parse(attempt.start_time) rescue nil
31
- finish_time = attempt.finish_time.length > 0 ? Time.parse(attempt.finish_time) : nil
32
- {
33
- _id: attempt.attempt_id.to_s,
34
- task_id: task_id,
35
- host: attempt.machine.to_s.gsub(/^http:\/\//, '').gsub(/:[0-9]+$/, ''),
36
- status: attempt.status,
37
- progress: attempt.progress.to_f / 100.0,
38
- start_time: start_time,
39
- finish_time: finish_time,
40
- duration: start_time ? (finish_time || Time.now) - start_time : nil,
41
- errors: attempt.errors
42
- }
43
- end
44
- end
45
- end
@@ -1,273 +0,0 @@
1
- require_relative 'configurable'
2
- require_relative 'hadoopable'
3
- require_relative 'hadoop_attempt_scraper'
4
-
5
- require 'json'
6
- require 'optparse'
7
- require 'ostruct'
8
- require 'logger'
9
- require 'pp'
10
- require 'gorillib/string/inflections'
11
- require 'swineherd-fs'
12
-
13
- class Vayacondios
14
-
15
- class HadoopClient
16
-
17
- include Configurable
18
- include Hadoopable
19
-
20
- RUNNING = JobStatus::RUNNING
21
-
22
- def initialize
23
- init_settings
24
- logger.info "Connecting to job tracker."
25
- @job_client = JobClient.new JobConf.new(get_hadoop_conf)
26
- end
27
-
28
- #
29
- # (Equality doesn't work for jobs, so - will not work as intended
30
- # on arrays of jobs.)
31
- #
32
- def subtract jobs_array1, jobs_array2
33
- jobs_array1.reject{|j| jobs_array2.map(&:job_id).map(&:to_s).index j.job_id.to_s}
34
- end
35
-
36
- #
37
- # Returns the jobs with the specified state. States are specified
38
- # by constants in this class.
39
- #
40
- def jobs_with_state state
41
- jobs_by_state[state] || []
42
- end
43
-
44
- #
45
- # Returns the properties of the specified job as a hash.
46
- #
47
- def job_properties job
48
- host_port = job.get_tracking_url[/^(http:\/\/)?[^\/]*/]
49
- job_id = job.get_id.to_s
50
- conf_uri = "#{host_port}/logs/#{job_id}_conf.xml"
51
-
52
- parse_properties(open conf_uri)
53
- end
54
-
55
- #
56
- # Returns the stats for the current job as a hash.
57
- #
58
- def job_stats job, finish_time
59
- parse_job job.get_id, finish_time
60
- end
61
-
62
- private
63
-
64
- #
65
- # Returns a hash JobStatus::<SOME_STATE> => <array of jobs>
66
- #
67
- def jobs_by_state
68
- job_statuses_by_state = @job_client.get_all_jobs.group_by(&:get_run_state)
69
- Hash[job_statuses_by_state.map{|state, job_statuses| [state, jobs_from_statuses(job_statuses)]}]
70
- end
71
-
72
- #
73
- # Some hadoop stuff returns JobStatus objects. This converts them
74
- # to RunningJob objects.
75
- #
76
- def jobs_from_statuses job_statuses
77
- job_statuses.map{|job_status| @job_client.get_job job_status.get_job_id}
78
- end
79
-
80
- #
81
- # Takes an org.apache.hadoop.mapred.RunningJob and returns a hash
82
- # object that represents it.
83
- #
84
- def parse_job job_id, finish_time
85
- job = @job_client.get_job job_id
86
- job_status = @job_client.get_all_jobs.select{|j| j.get_job_id.to_s == job_id.to_s}.first
87
- finished_status = [:FAILED, :KILLED, :COMPLETE]
88
- failed_status = [:FAILED]
89
-
90
-
91
- # not sure what is what. I'm guessing
92
- # JobStatus.getStartTime corresponds to the
93
- # launch time in the logs
94
-
95
- start_time = Time.at(job_status.get_start_time / 1000)
96
- reduce_progress = job.reduce_progress
97
- map_progress = job.map_progress
98
- run_duration = (finish_time || Time.now) - start_time
99
-
100
- map_eta = map_progress && map_progress > 0.0 ? (start_time + (run_duration / map_progress)) : nil
101
- reduce_eta = reduce_progress && reduce_progress > 0.0 ? (start_time + (run_duration / reduce_progress)) : nil
102
-
103
- job_data = {
104
-
105
- _id: job_id.to_s,
106
- name: job.get_job_name.to_s,
107
-
108
- start_time: start_time,
109
- finish_time: finish_time,
110
-
111
- duration: run_duration,
112
-
113
- map_eta: map_eta,
114
- reduce_eta: reduce_eta,
115
- eta: reduce_eta,
116
-
117
- status: case job_status.get_run_state
118
- when JobStatus::FAILED then :FAILED
119
- when JobStatus::KILLED then :KILLED
120
- when JobStatus::PREP then :PREP
121
- when JobStatus::RUNNING then :RUNNING
122
- when JobStatus::SUCCEEDED then :SUCCEEDED
123
- end,
124
-
125
- finished_maps: num_tasks(job_id, :map, finished_status),
126
- finished_reduces: num_tasks(job_id, :reduce, finished_status),
127
- failed_maps: num_tasks(job_id, :map, failed_status),
128
- failed_reduces: num_tasks(job_id, :reduce, failed_status),
129
-
130
- counters: parse_counters(job.get_counters)
131
- }
132
-
133
- job_event = {
134
- t: Time.now,
135
- d: {
136
- job_id: job.job_id,
137
- cleanup_progress: job.cleanup_progress,
138
- map_progress: job.map_progress,
139
- reduce_progress: job.reduce_progress,
140
- setup_progress: job.setup_progress,
141
- }
142
- }
143
-
144
- setup_task_data = @job_client.get_setup_task_reports job_id
145
- map_task_data = @job_client.get_map_task_reports job_id
146
- reduce_task_data = @job_client.get_reduce_task_reports job_id
147
- cleanup_task_data = @job_client.get_cleanup_task_reports job_id
148
-
149
- setup_reports = setup_task_data.map{|task| parse_task task, "SETUP", job_id }
150
- setup_event_reports = setup_task_data.map{|task| parse_task_progress task, "SETUP" }
151
-
152
- map_reports = map_task_data.map{|task| parse_task task, "MAP", job_id }
153
- map_event_reports = map_task_data.map{|task| parse_task_progress task, "MAP" }
154
-
155
- reduce_reports = reduce_task_data.map{|task| parse_task task, "REDUCE", job_id }
156
- reduce_event_reports = reduce_task_data.map{|task| parse_task_progress task, "REDUCE" }
157
-
158
- cleanup_reports = cleanup_task_data.map{|task| parse_task task, "CLEANUP", job_id }
159
- cleanup_event_reports = cleanup_task_data.map{|task| parse_task_progress task, "CLEANUP" }
160
-
161
- tasks = setup_reports + map_reports + reduce_reports + cleanup_reports
162
- task_events = setup_event_reports + map_event_reports + reduce_event_reports + cleanup_event_reports
163
-
164
- attempt_reports = tasks.map{|task| HadoopAttemptScraper.scrape_task(task[:_id]).to_attempts }.flatten
165
-
166
- {
167
- job: job_data,
168
- job_event: job_event,
169
- tasks: tasks,
170
- task_events: task_events,
171
- attempts: attempt_reports
172
- }
173
- end
174
-
175
- #
176
- # Return a hash containing a name => value hash representing the
177
- # config for a hadoop job.
178
- #
179
- def parse_properties conf
180
- properties = {}
181
- conf.read.scan /[^\n]*\n/ do |line,|
182
- m = /<name>([^<]+)<\/name><value>([^<]+)<\/value>/.match line
183
- if m and m[1] !~ /fs\.s3n?\.awsSecretAccessKey/ then
184
- properties[parse_key m[1]] = parse_atom m[2]
185
- end
186
- end
187
- properties
188
- end
189
-
190
- #
191
- # Takes an org.apache.hadoop.mapred.TaskReport and returns a Hash
192
- # object that represents it.
193
- #
194
- def parse_task task_report, task_type, parent_job_id
195
- start_time = task_report.get_start_time > 0 ? Time.at(task_report.get_start_time / 1000) : nil
196
- finish_time = task_report.get_finish_time > 0 ? Time.at(task_report.get_finish_time / 1000) : nil
197
-
198
- {
199
- _id: task_report.get_task_id.to_s,
200
- job_id: parent_job_id.to_s,
201
- type: task_type,
202
- status: task_report.get_current_status.to_s,
203
- start_time: start_time,
204
- finish_time: finish_time,
205
- duration: start_time ? (finish_time || Time.now) - start_time : nil,
206
- counters: parse_counters(task_report.get_counters),
207
- diagnostics: task_report.get_diagnostics.map(&:to_s),
208
- successful_attempt_id: task_report.get_successful_task_attempt.to_s
209
- }
210
- end
211
-
212
- def parse_task_progress task_report, task_type
213
- {
214
- t: Time.now,
215
- d: {
216
- task_id: task_report.get_task_id.to_s,
217
- progress: task_report.get_progress,
218
- running_attempt_ids: task_report.get_running_task_attempts.map(&:to_s)
219
- }
220
- }
221
- end
222
-
223
- #
224
- # Takes a class of type org.apache.hadoop.mapred.Counters and
225
- # returns a Hash object that represents this counter.
226
- #
227
- def parse_counters counters
228
- Hash[counters.map do |group|
229
- [parse_key(group.get_name), Hash[group.map do |counter|
230
- [parse_key(counter.get_name), counter.get_counter]
231
- end]]
232
- end]
233
- end
234
-
235
- #
236
- # Parse a key in a log entry. Log entries consist of a type, which I
237
- # consider a key, and a list of key=value pairs.
238
- #
239
- def parse_key key
240
- return (parse_atom key).underscore.gsub ".", "_"
241
- end
242
-
243
- #
244
- # Parse a value in a Hadoop log.
245
- #
246
- def parse_atom a
247
- if /[0-9][ \r\t\n]*\/[ \r\t\n]*[0-9]+/.match a
248
- # "0/6" -> [0,6]
249
- return a.split("/").collect{|s| s.to_i}
250
- elsif /^[0-9,]*$/.match a
251
- # "224" -> 224
252
- return a.gsub(',', '').to_i
253
- else
254
- # \. -> .
255
- return a.gsub(/([^\\])\\(.)/, '\1\2')
256
- end
257
- end
258
-
259
- #
260
- # Returns the number of tasks of the specified TIPStatus from the
261
- # specified job_client of the specified type (map or reduce)
262
- #
263
- def num_tasks job_id, map_or_reduce, statuses
264
- method_name = "get_#{map_or_reduce}_task_reports".to_sym
265
- @job_client.send(method_name, job_id).select do |report|
266
- tip_statuses = statuses.map do |status|
267
- TIPStatus.const_get status
268
- end
269
- tip_statuses.index report.get_current_status
270
- end.size
271
- end
272
- end
273
- end
@@ -1,101 +0,0 @@
1
- #!/usr/bin/env jruby19
2
-
3
- require_relative 'hadoop_client'
4
- require_relative 'configurable'
5
- require 'java'
6
- require 'mongo'
7
- require 'scanf'
8
- require 'gorillib/hash/slice'
9
- require 'thread'
10
- require 'open-uri'
11
- require 'json'
12
-
13
- class Vayacondios
14
-
15
- class HadoopMonitor
16
- def initialize
17
- init_settings
18
-
19
- @hadoop = HadoopClient.new
20
-
21
- @monitored_jobs = []
22
-
23
- logger.debug "Creating mongo collections."
24
- @conn = Mongo::Connection.new settings.mongo_ip
25
- @db = @conn[settings.mongo_db]
26
-
27
- capped_collection_opts = {
28
- :capped => true,
29
- :size => settings.mongo_collection_size
30
- }
31
-
32
- @collections = {
33
- jobs: @db.create_collection('jobs'),
34
- tasks: @db.create_collection('job_tasks'),
35
- attempts: @db.create_collection('job_task_attempts'),
36
-
37
- job_events: @db.create_collection('job_events', capped_collection_opts),
38
- task_events: @db.create_collection('job_task_events', capped_collection_opts),
39
- }
40
- end
41
-
42
- def run
43
- loop do
44
-
45
- logger.debug "In main event loop."
46
-
47
- running_jobs = @hadoop.jobs_with_state HadoopClient::RUNNING
48
- started_jobs = @hadoop.subtract(running_jobs, @monitored_jobs)
49
- finished_jobs = @hadoop.subtract(@monitored_jobs, running_jobs)
50
-
51
- finished_jobs.each do |job|
52
- logger.debug "#{job.get_id.to_s} is complete."
53
- update_job_stats job, Time.now
54
- end
55
-
56
- running_jobs.each{|job| update_job_stats job, nil, @hadoop.subtract([job], started_jobs).empty? }
57
-
58
- @monitored_jobs = running_jobs
59
-
60
- sleep settings.sleep_seconds
61
-
62
- end
63
- end
64
-
65
- private
66
-
67
- include Configurable
68
-
69
- def update_job_stats job, finish_time = nil, include_properties = false
70
- stats = @hadoop.job_stats(job, finish_time)
71
-
72
- if include_properties
73
- stats[:job][:properties] = @hadoop.job_properties job
74
- end
75
-
76
- logger.debug "upserting job #{JSON.generate stats[:job]}"
77
- @collections[:jobs].update({_id: stats[:job][:_id]}, stats[:job], upsert: true)
78
-
79
- logger.debug "upserting job_event #{JSON.generate stats[:job_event]}"
80
- @collections[:job_events].insert(stats[:job_event])
81
-
82
- logger.debug "upserting tasks #{JSON.generate stats[:tasks]}"
83
- stats[:tasks].each do |task|
84
- @collections[:tasks].update({_id: task[:_id]}, task, upsert: true)
85
- end
86
-
87
- logger.debug "upserting task_events #{JSON.generate stats[:task_events]}"
88
- stats[:task_events].each do |task_event|
89
- @collections[:task_events].insert(task_event)
90
- end
91
-
92
- logger.debug "upserting attempts #{JSON.generate stats[:attempts]}"
93
- stats[:attempts].each do |attempt|
94
- @collections[:attempts].update({_id: attempt[:_id]}, attempt, upsert: true)
95
- end
96
- end
97
-
98
- end
99
- end
100
-
101
- Vayacondios::HadoopMonitor.new.run
@@ -1,65 +0,0 @@
1
- require 'stringio'
2
-
3
- class Vayacondios
4
-
5
- module Hadoopable
6
-
7
- include Configurable
8
-
9
- #--------------------------------------------------------------------------------
10
- # Initialize jruby and tell it about hadoop.
11
- #--------------------------------------------------------------------------------
12
-
13
- begin
14
- require 'java'
15
- rescue LoadError => e
16
- raise "\nJava not found. Are you sure you're running with JRuby?\n#{e.message}"
17
- end
18
-
19
- hadoop_home = ENV['HADOOP_HOME'] || '/usr/lib/hadoop'
20
-
21
- raise "\nHadoop installation not found. Try setting $HADOOP_HOME\n" unless (hadoop_home and (File.exist? hadoop_home))
22
-
23
- $CLASSPATH << File.join(File.join(hadoop_home, 'conf') || ENV['HADOOP_CONF_DIR'],
24
- '') # add trailing slash
25
-
26
- Dir["#{hadoop_home}/{hadoop*.jar,lib/*.jar}"].each{|jar| require jar}
27
-
28
- include_class org.apache.hadoop.mapred.JobConf
29
- include_class org.apache.hadoop.mapred.JobClient
30
- include_class org.apache.hadoop.mapred.JobStatus
31
- include_class org.apache.hadoop.mapred.TIPStatus
32
- include_class org.apache.hadoop.conf.Configuration
33
- #--------------------------------------------------------------------------------
34
-
35
- def get_hadoop_conf
36
- logger.debug "Getting hadoop configuration"
37
-
38
- stderr, $stderr = $stderr, StringIO.new
39
-
40
- conf = Configuration.new
41
-
42
- # per-site defaults
43
- %w[capacity-scheduler.xml core-site.xml hadoop-policy.xml hadoop-site.xml hdfs-site.xml mapred-site.xml].each do |conf_file|
44
- conf.addResource conf_file
45
- end
46
-
47
- conf.reload_configuration
48
-
49
- # per-user overrides
50
- if Swineherd.config[:aws]
51
- conf.set("fs.s3.awsAccessKeyId",Swineherd.config[:aws][:access_key])
52
- conf.set("fs.s3.awsSecretAccessKey",Swineherd.config[:aws][:secret_key])
53
-
54
- conf.set("fs.s3n.awsAccessKeyId",Swineherd.config[:aws][:access_key])
55
- conf.set("fs.s3n.awsSecretAccessKey",Swineherd.config[:aws][:secret_key])
56
- end
57
-
58
- return conf
59
- ensure
60
- stderr_lines = $stderr.string.split("\n")
61
- $stderr = stderr
62
- stderr_lines.each{|line| logger.debug line}
63
- end
64
- end
65
- end