vayacondios-server 0.2.11 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. data/.gitignore +3 -1
  2. data/.travis.yml +2 -0
  3. data/Gemfile +15 -9
  4. data/LICENSE.md +2 -6
  5. data/Procfile +1 -1
  6. data/README.md +656 -111
  7. data/Rakefile +89 -6
  8. data/bin/vcd +10 -0
  9. data/bin/vcd-server +8 -0
  10. data/config/database.yml +6 -0
  11. data/config/spec.example.yml +18 -0
  12. data/config/vayacondios.example.yml +15 -0
  13. data/config/vcd-server.rb +37 -0
  14. data/examples/configuration.rb +56 -0
  15. data/examples/event_stream.rb +19 -0
  16. data/examples/simple.rb +61 -0
  17. data/features/event.feature +319 -0
  18. data/features/events.feature +208 -0
  19. data/features/stash.feature +840 -0
  20. data/features/stashes.feature +492 -0
  21. data/features/step_definitions/stash_steps.rb +113 -0
  22. data/features/stream.feature +30 -0
  23. data/features/support/em.rb +14 -0
  24. data/features/support/env.rb +13 -0
  25. data/lib/vayacondios/configuration.rb +63 -0
  26. data/lib/vayacondios/server/api.rb +126 -0
  27. data/lib/vayacondios/server/api_options.rb +56 -0
  28. data/lib/vayacondios/server/configuration.rb +23 -0
  29. data/lib/vayacondios/server/driver.rb +71 -0
  30. data/lib/vayacondios/server/drivers/mongo.rb +126 -0
  31. data/lib/vayacondios/server/handlers/document_handler.rb +81 -0
  32. data/lib/vayacondios/server/handlers/event_handler.rb +31 -26
  33. data/lib/vayacondios/server/handlers/events_handler.rb +31 -0
  34. data/lib/vayacondios/server/handlers/stash_handler.rb +69 -0
  35. data/lib/vayacondios/server/handlers/stashes_handler.rb +49 -0
  36. data/lib/vayacondios/server/handlers/stream_handler.rb +39 -0
  37. data/lib/vayacondios/server/models/document.rb +87 -0
  38. data/lib/vayacondios/server/models/event.rb +198 -0
  39. data/lib/vayacondios/server/models/stash.rb +100 -0
  40. data/lib/vayacondios/server.rb +35 -0
  41. data/lib/vayacondios-server.rb +19 -13
  42. data/lib/vayacondios.rb +22 -0
  43. data/pom.xml +124 -4
  44. data/spec/configuration_spec.rb +41 -0
  45. data/spec/server/api_options_spec.rb +32 -0
  46. data/spec/server/api_spec.rb +279 -0
  47. data/spec/server/configuration_spec.rb +27 -0
  48. data/spec/server/drivers/mongo_spec.rb +107 -0
  49. data/spec/server/handlers/event_handler_spec.rb +62 -0
  50. data/spec/server/handlers/events_handler_spec.rb +51 -0
  51. data/spec/server/handlers/stash_handler_spec.rb +68 -0
  52. data/spec/server/handlers/stashes_handler_spec.rb +50 -0
  53. data/spec/server/handlers/stream_handler_spec.rb +5 -0
  54. data/spec/server/models/document_spec.rb +9 -0
  55. data/spec/server/models/event_spec.rb +185 -0
  56. data/spec/server/models/stash_spec.rb +95 -0
  57. data/spec/spec_helper.rb +23 -3
  58. data/spec/support/database_helper.rb +42 -0
  59. data/spec/support/log_helper.rb +19 -0
  60. data/spec/support/shared_context_for_events.rb +22 -0
  61. data/spec/support/shared_context_for_stashes.rb +24 -0
  62. data/spec/support/shared_examples_for_handlers.rb +32 -0
  63. data/src/main/java/com/infochimps/vayacondios/BaseClient.java +342 -0
  64. data/src/main/java/com/infochimps/vayacondios/HTTPClient.java +426 -0
  65. data/src/main/java/com/infochimps/vayacondios/VayacondiosClient.java +487 -65
  66. data/src/main/java/com/infochimps/vayacondios/test/IntegrationTest.java +3 -0
  67. data/src/test/java/com/infochimps/vayacondios/BaseClientTest.java +50 -0
  68. data/src/test/java/com/infochimps/vayacondios/HTTPClientIT.java +267 -0
  69. data/vayacondios-server.gemspec +9 -9
  70. metadata +127 -122
  71. checksums.yaml +0 -15
  72. data/.rspec +0 -2
  73. data/.yardopts +0 -10
  74. data/Guardfile +0 -41
  75. data/app/http_shim.rb +0 -71
  76. data/bin/vcd.sh +0 -27
  77. data/config/http_shim.rb +0 -43
  78. data/config/vayacondios.example.yaml +0 -7
  79. data/config/vayacondios.yaml +0 -7
  80. data/examples/java/ItemSetTest.java +0 -76
  81. data/lib/tasks/publish.rake +0 -23
  82. data/lib/tasks/spec.rake +0 -11
  83. data/lib/tasks/yard.rake +0 -2
  84. data/lib/vayacondios/client/config.rb +0 -7
  85. data/lib/vayacondios/client/configliere.rb +0 -38
  86. data/lib/vayacondios/client/cube_client.rb +0 -39
  87. data/lib/vayacondios/client/http_client.rb +0 -49
  88. data/lib/vayacondios/client/itemset.rb +0 -130
  89. data/lib/vayacondios/client/legacy_switch.rb +0 -43
  90. data/lib/vayacondios/client/notifier.rb +0 -123
  91. data/lib/vayacondios/client/zabbix_client.rb +0 -148
  92. data/lib/vayacondios/legacy_switch.rb +0 -43
  93. data/lib/vayacondios/server/errors/bad_request.rb +0 -6
  94. data/lib/vayacondios/server/errors/not_found.rb +0 -6
  95. data/lib/vayacondios/server/handlers/config_handler.rb +0 -32
  96. data/lib/vayacondios/server/handlers/itemset_handler.rb +0 -60
  97. data/lib/vayacondios/server/legacy_switch.rb +0 -43
  98. data/lib/vayacondios/server/model/config_document.rb +0 -89
  99. data/lib/vayacondios/server/model/document.rb +0 -25
  100. data/lib/vayacondios/server/model/event_document.rb +0 -94
  101. data/lib/vayacondios/server/model/itemset_document.rb +0 -126
  102. data/lib/vayacondios/server/rack/extract_methods.rb +0 -35
  103. data/lib/vayacondios/server/rack/jsonize.rb +0 -43
  104. data/lib/vayacondios/server/rack/params.rb +0 -50
  105. data/lib/vayacondios/server/rack/path.rb +0 -23
  106. data/lib/vayacondios/server/rack/path_validation.rb +0 -22
  107. data/lib/vayacondios/version.rb +0 -3
  108. data/lib/vayacondios-client.rb +0 -22
  109. data/scripts/hadoop_monitor/configurable.rb +0 -66
  110. data/scripts/hadoop_monitor/hadoop_attempt_scraper.rb +0 -45
  111. data/scripts/hadoop_monitor/hadoop_client.rb +0 -273
  112. data/scripts/hadoop_monitor/hadoop_monitor.rb +0 -101
  113. data/scripts/hadoop_monitor/hadoopable.rb +0 -65
  114. data/scripts/hadoop_monitor/machine_monitor.rb +0 -115
  115. data/scripts/s3_cataloger/buckets +0 -33
  116. data/scripts/s3_cataloger/foreach_bucket +0 -88
  117. data/scripts/s3_cataloger/parse_ls.py +0 -391
  118. data/spec/client/itemset_legacy_spec.rb +0 -55
  119. data/spec/client/itemset_spec.rb +0 -60
  120. data/spec/client/notifier_spec.rb +0 -120
  121. data/spec/server/config_spec.rb +0 -113
  122. data/spec/server/event_spec.rb +0 -103
  123. data/spec/server/itemset_legacy_spec.rb +0 -320
  124. data/spec/server/itemset_spec.rb +0 -317
  125. data/spec/server/rack/extract_methods_spec.rb +0 -60
  126. data/spec/server/rack/path_spec.rb +0 -36
  127. data/spec/server/rack/path_validation_spec.rb +0 -22
  128. data/spec/server/server_spec.rb +0 -20
  129. data/spec/support/mongo_cleaner.rb +0 -32
  130. data/src/main/java/ItemSetTest.java +0 -76
  131. data/src/main/java/com/infochimps/util/CurrentClass.java +0 -26
  132. data/src/main/java/com/infochimps/util/DebugUtil.java +0 -38
  133. data/src/main/java/com/infochimps/util/HttpHelper.java +0 -181
  134. data/src/main/java/com/infochimps/vayacondios/ItemSets.java +0 -373
  135. data/src/main/java/com/infochimps/vayacondios/LinkToVCD.java +0 -18
  136. data/src/main/java/com/infochimps/vayacondios/MemoryVCDShim.java +0 -84
  137. data/src/main/java/com/infochimps/vayacondios/Organization.java +0 -62
  138. data/src/main/java/com/infochimps/vayacondios/PathBuilder.java +0 -13
  139. data/src/main/java/com/infochimps/vayacondios/StandardVCDLink.java +0 -218
  140. data/src/main/java/com/infochimps/vayacondios/VCDIntegrationTest.java +0 -108
  141. data/src/test/java/com/infochimps/vayacondios/TestVayacondiosInMemory.java +0 -78
  142. data/vayacondios-client.gemspec +0 -25
@@ -1,22 +0,0 @@
1
- class Vayacondios
2
- module Rack
3
- class PathValidation
4
- include Goliath::Rack::AsyncMiddleware
5
-
6
- def initialize(app, opts = {})
7
- @app = app ; @opts = opts
8
- end
9
-
10
- def call(env)
11
- return [400, {}, MultiJson.dump({ error: "Bad Request. Format path is <host>/v1/<org>/event/<topic>" })] unless valid_paths? env[:vayacondios_path]
12
- @app.call(env)
13
- end
14
-
15
- def valid_paths?(path)
16
- # use @opts for validation later
17
- path.nil? ? false : true
18
- end
19
-
20
- end
21
- end
22
- end
@@ -1,3 +0,0 @@
1
- class Vayacondios
2
- VERSION = '0.2.11'
3
- end
@@ -1,22 +0,0 @@
1
- require 'configliere'
2
- require 'multi_json'
3
- require 'net/http'
4
-
5
- require 'gorillib/builder'
6
- require 'gorillib/configurable'
7
- require 'gorillib/enumerable/sum'
8
- require 'gorillib/exception/raisers'
9
- require 'gorillib/hash/deep_compact'
10
- require 'gorillib/hash/deep_merge'
11
- require 'gorillib/hash/keys'
12
- require 'gorillib/logger/log'
13
- require 'gorillib/metaprogramming/class_attribute'
14
- require 'gorillib/object/blank'
15
- require 'gorillib/string/constantize'
16
- require 'gorillib/string/inflections'
17
-
18
- require 'vayacondios/client/http_client'
19
- require 'vayacondios/client/cube_client'
20
- require 'vayacondios/client/zabbix_client'
21
- require 'vayacondios/client/notifier'
22
- require 'vayacondios/client/configliere'
@@ -1,66 +0,0 @@
1
- require 'configliere'
2
- require 'logger'
3
-
4
- class Vayacondios
5
-
6
- module Configurable
7
-
8
- #
9
- # Declare a name CONST_NAME = :const_name
10
- #
11
- def self.declare_name symbol
12
- const_set symbol.to_s.upcase.to_sym, symbol
13
- end
14
-
15
- declare_name :cluster_busy
16
- declare_name :cluster_quiet
17
- declare_name :event
18
- declare_name :time
19
-
20
- attr_reader :logger
21
-
22
- def settings
23
- init_settings
24
- return @settings
25
- end
26
-
27
- def init_settings
28
- return if defined? @settings
29
-
30
- @settings = Configliere::Param.new
31
- @settings.use :env_var, :config_file, :commandline
32
-
33
- @settings.define(:config_file,
34
- description: "Config file location")
35
- @settings.define(:sleep_seconds,
36
- default: 5,
37
- description: "Time to sleep in main loops")
38
- @settings.define(:log_level,
39
- default: "info",
40
- description: "Log level. See standard Logger class")
41
- @settings.define(:mongo_db,
42
- default: 'job_info',
43
- description: "Mongo database to dump hadoop job information into")
44
- @settings.define(:mongo_ip,
45
- default: nil,
46
- description: "IP address of Hadoop monitor node")
47
- @settings.define(:mongo_collection_size,
48
- default: 10 * (1 << 20),
49
- description: ("Size (in bytes) of Mongo job events collection"))
50
-
51
- @settings.resolve!
52
-
53
- if @settings.config_file
54
- @settings.read(@settings.config_file)
55
- @settings.resolve!
56
- end
57
-
58
- @logger = Logger.new(STDERR)
59
- @logger.level = Logger.const_get(@settings.log_level.upcase.to_sym)
60
-
61
- @logger.info "Settings: #{@settings}"
62
-
63
- @settings
64
- end
65
- end
66
- end
@@ -1,45 +0,0 @@
1
- require 'open-uri'
2
- require 'nibbler'
3
- require 'socket'
4
-
5
- class HadoopAttemptScraper < Nibbler
6
- attr_accessor :task_id
7
-
8
- def self.scrape_task(task_id)
9
- task_id = task_id.to_s
10
-
11
- url = "http://#{Socket.gethostname}:50030/taskdetails.jsp?tipid=#{task_id}"
12
- scrape = parse(open(url))
13
- scrape.task_id = task_id
14
-
15
- scrape
16
- end
17
-
18
- elements 'table.jobtasks tbody > tr' => :attempts do
19
- element 'td:nth-child(1)' => 'attempt_id'
20
- element 'td:nth-child(2) a/@href' => 'machine'
21
- element 'td:nth-child(3)' => 'status'
22
- element 'td:nth-child(4)' => 'progress'
23
- element 'td:nth-child(5)' => 'start_time'
24
- element 'td:nth-child(6)' => 'finish_time'
25
- element 'td:nth-child(7)' => 'errors'
26
- end
27
-
28
- def to_attempts
29
- attempts.map do |attempt|
30
- start_time = Time.parse(attempt.start_time) rescue nil
31
- finish_time = attempt.finish_time.length > 0 ? Time.parse(attempt.finish_time) : nil
32
- {
33
- _id: attempt.attempt_id.to_s,
34
- task_id: task_id,
35
- host: attempt.machine.to_s.gsub(/^http:\/\//, '').gsub(/:[0-9]+$/, ''),
36
- status: attempt.status,
37
- progress: attempt.progress.to_f / 100.0,
38
- start_time: start_time,
39
- finish_time: finish_time,
40
- duration: start_time ? (finish_time || Time.now) - start_time : nil,
41
- errors: attempt.errors
42
- }
43
- end
44
- end
45
- end
@@ -1,273 +0,0 @@
1
- require_relative 'configurable'
2
- require_relative 'hadoopable'
3
- require_relative 'hadoop_attempt_scraper'
4
-
5
- require 'json'
6
- require 'optparse'
7
- require 'ostruct'
8
- require 'logger'
9
- require 'pp'
10
- require 'gorillib/string/inflections'
11
- require 'swineherd-fs'
12
-
13
- class Vayacondios
14
-
15
- class HadoopClient
16
-
17
- include Configurable
18
- include Hadoopable
19
-
20
- RUNNING = JobStatus::RUNNING
21
-
22
- def initialize
23
- init_settings
24
- logger.info "Connecting to job tracker."
25
- @job_client = JobClient.new JobConf.new(get_hadoop_conf)
26
- end
27
-
28
- #
29
- # (Equality doesn't work for jobs, so - will not work as intended
30
- # on arrays of jobs.)
31
- #
32
- def subtract jobs_array1, jobs_array2
33
- jobs_array1.reject{|j| jobs_array2.map(&:job_id).map(&:to_s).index j.job_id.to_s}
34
- end
35
-
36
- #
37
- # Returns the jobs with the specified state. States are specified
38
- # by constants in this class.
39
- #
40
- def jobs_with_state state
41
- jobs_by_state[state] || []
42
- end
43
-
44
- #
45
- # Returns the properties of the specified job as a hash.
46
- #
47
- def job_properties job
48
- host_port = job.get_tracking_url[/^(http:\/\/)?[^\/]*/]
49
- job_id = job.get_id.to_s
50
- conf_uri = "#{host_port}/logs/#{job_id}_conf.xml"
51
-
52
- parse_properties(open conf_uri)
53
- end
54
-
55
- #
56
- # Returns the stats for the current job as a hash.
57
- #
58
- def job_stats job, finish_time
59
- parse_job job.get_id, finish_time
60
- end
61
-
62
- private
63
-
64
- #
65
- # Returns a hash JobStatus::<SOME_STATE> => <array of jobs>
66
- #
67
- def jobs_by_state
68
- job_statuses_by_state = @job_client.get_all_jobs.group_by(&:get_run_state)
69
- Hash[job_statuses_by_state.map{|state, job_statuses| [state, jobs_from_statuses(job_statuses)]}]
70
- end
71
-
72
- #
73
- # Some hadoop stuff returns JobStatus objects. This converts them
74
- # to RunningJob objects.
75
- #
76
- def jobs_from_statuses job_statuses
77
- job_statuses.map{|job_status| @job_client.get_job job_status.get_job_id}
78
- end
79
-
80
- #
81
- # Takes an org.apache.hadoop.mapred.RunningJob and returns a hash
82
- # object that represents it.
83
- #
84
- def parse_job job_id, finish_time
85
- job = @job_client.get_job job_id
86
- job_status = @job_client.get_all_jobs.select{|j| j.get_job_id.to_s == job_id.to_s}.first
87
- finished_status = [:FAILED, :KILLED, :COMPLETE]
88
- failed_status = [:FAILED]
89
-
90
-
91
- # not sure what is what. I'm guessing
92
- # JobStatus.getStartTime corresponds to the
93
- # launch time in the logs
94
-
95
- start_time = Time.at(job_status.get_start_time / 1000)
96
- reduce_progress = job.reduce_progress
97
- map_progress = job.map_progress
98
- run_duration = (finish_time || Time.now) - start_time
99
-
100
- map_eta = map_progress && map_progress > 0.0 ? (start_time + (run_duration / map_progress)) : nil
101
- reduce_eta = reduce_progress && reduce_progress > 0.0 ? (start_time + (run_duration / reduce_progress)) : nil
102
-
103
- job_data = {
104
-
105
- _id: job_id.to_s,
106
- name: job.get_job_name.to_s,
107
-
108
- start_time: start_time,
109
- finish_time: finish_time,
110
-
111
- duration: run_duration,
112
-
113
- map_eta: map_eta,
114
- reduce_eta: reduce_eta,
115
- eta: reduce_eta,
116
-
117
- status: case job_status.get_run_state
118
- when JobStatus::FAILED then :FAILED
119
- when JobStatus::KILLED then :KILLED
120
- when JobStatus::PREP then :PREP
121
- when JobStatus::RUNNING then :RUNNING
122
- when JobStatus::SUCCEEDED then :SUCCEEDED
123
- end,
124
-
125
- finished_maps: num_tasks(job_id, :map, finished_status),
126
- finished_reduces: num_tasks(job_id, :reduce, finished_status),
127
- failed_maps: num_tasks(job_id, :map, failed_status),
128
- failed_reduces: num_tasks(job_id, :reduce, failed_status),
129
-
130
- counters: parse_counters(job.get_counters)
131
- }
132
-
133
- job_event = {
134
- t: Time.now,
135
- d: {
136
- job_id: job.job_id,
137
- cleanup_progress: job.cleanup_progress,
138
- map_progress: job.map_progress,
139
- reduce_progress: job.reduce_progress,
140
- setup_progress: job.setup_progress,
141
- }
142
- }
143
-
144
- setup_task_data = @job_client.get_setup_task_reports job_id
145
- map_task_data = @job_client.get_map_task_reports job_id
146
- reduce_task_data = @job_client.get_reduce_task_reports job_id
147
- cleanup_task_data = @job_client.get_cleanup_task_reports job_id
148
-
149
- setup_reports = setup_task_data.map{|task| parse_task task, "SETUP", job_id }
150
- setup_event_reports = setup_task_data.map{|task| parse_task_progress task, "SETUP" }
151
-
152
- map_reports = map_task_data.map{|task| parse_task task, "MAP", job_id }
153
- map_event_reports = map_task_data.map{|task| parse_task_progress task, "MAP" }
154
-
155
- reduce_reports = reduce_task_data.map{|task| parse_task task, "REDUCE", job_id }
156
- reduce_event_reports = reduce_task_data.map{|task| parse_task_progress task, "REDUCE" }
157
-
158
- cleanup_reports = cleanup_task_data.map{|task| parse_task task, "CLEANUP", job_id }
159
- cleanup_event_reports = cleanup_task_data.map{|task| parse_task_progress task, "CLEANUP" }
160
-
161
- tasks = setup_reports + map_reports + reduce_reports + cleanup_reports
162
- task_events = setup_event_reports + map_event_reports + reduce_event_reports + cleanup_event_reports
163
-
164
- attempt_reports = tasks.map{|task| HadoopAttemptScraper.scrape_task(task[:_id]).to_attempts }.flatten
165
-
166
- {
167
- job: job_data,
168
- job_event: job_event,
169
- tasks: tasks,
170
- task_events: task_events,
171
- attempts: attempt_reports
172
- }
173
- end
174
-
175
- #
176
- # Return a hash containing a name => value hash representing the
177
- # config for a hadoop job.
178
- #
179
- def parse_properties conf
180
- properties = {}
181
- conf.read.scan /[^\n]*\n/ do |line,|
182
- m = /<name>([^<]+)<\/name><value>([^<]+)<\/value>/.match line
183
- if m and m[1] !~ /fs\.s3n?\.awsSecretAccessKey/ then
184
- properties[parse_key m[1]] = parse_atom m[2]
185
- end
186
- end
187
- properties
188
- end
189
-
190
- #
191
- # Takes an org.apache.hadoop.mapred.TaskReport and returns a Hash
192
- # object that represents it.
193
- #
194
- def parse_task task_report, task_type, parent_job_id
195
- start_time = task_report.get_start_time > 0 ? Time.at(task_report.get_start_time / 1000) : nil
196
- finish_time = task_report.get_finish_time > 0 ? Time.at(task_report.get_finish_time / 1000) : nil
197
-
198
- {
199
- _id: task_report.get_task_id.to_s,
200
- job_id: parent_job_id.to_s,
201
- type: task_type,
202
- status: task_report.get_current_status.to_s,
203
- start_time: start_time,
204
- finish_time: finish_time,
205
- duration: start_time ? (finish_time || Time.now) - start_time : nil,
206
- counters: parse_counters(task_report.get_counters),
207
- diagnostics: task_report.get_diagnostics.map(&:to_s),
208
- successful_attempt_id: task_report.get_successful_task_attempt.to_s
209
- }
210
- end
211
-
212
- def parse_task_progress task_report, task_type
213
- {
214
- t: Time.now,
215
- d: {
216
- task_id: task_report.get_task_id.to_s,
217
- progress: task_report.get_progress,
218
- running_attempt_ids: task_report.get_running_task_attempts.map(&:to_s)
219
- }
220
- }
221
- end
222
-
223
- #
224
- # Takes a class of type org.apache.hadoop.mapred.Counters and
225
- # returns a Hash object that represents this counter.
226
- #
227
- def parse_counters counters
228
- Hash[counters.map do |group|
229
- [parse_key(group.get_name), Hash[group.map do |counter|
230
- [parse_key(counter.get_name), counter.get_counter]
231
- end]]
232
- end]
233
- end
234
-
235
- #
236
- # Parse a key in a log entry. Log entries consist of a type, which I
237
- # consider a key, and a list of key=value pairs.
238
- #
239
- def parse_key key
240
- return (parse_atom key).underscore.gsub ".", "_"
241
- end
242
-
243
- #
244
- # Parse a value in a Hadoop log.
245
- #
246
- def parse_atom a
247
- if /[0-9][ \r\t\n]*\/[ \r\t\n]*[0-9]+/.match a
248
- # "0/6" -> [0,6]
249
- return a.split("/").collect{|s| s.to_i}
250
- elsif /^[0-9,]*$/.match a
251
- # "224" -> 224
252
- return a.gsub(',', '').to_i
253
- else
254
- # \. -> .
255
- return a.gsub(/([^\\])\\(.)/, '\1\2')
256
- end
257
- end
258
-
259
- #
260
- # Returns the number of tasks of the specified TIPStatus from the
261
- # specified job_client of the specified type (map or reduce)
262
- #
263
- def num_tasks job_id, map_or_reduce, statuses
264
- method_name = "get_#{map_or_reduce}_task_reports".to_sym
265
- @job_client.send(method_name, job_id).select do |report|
266
- tip_statuses = statuses.map do |status|
267
- TIPStatus.const_get status
268
- end
269
- tip_statuses.index report.get_current_status
270
- end.size
271
- end
272
- end
273
- end
@@ -1,101 +0,0 @@
1
- #!/usr/bin/env jruby19
2
-
3
- require_relative 'hadoop_client'
4
- require_relative 'configurable'
5
- require 'java'
6
- require 'mongo'
7
- require 'scanf'
8
- require 'gorillib/hash/slice'
9
- require 'thread'
10
- require 'open-uri'
11
- require 'json'
12
-
13
- class Vayacondios
14
-
15
- class HadoopMonitor
16
- def initialize
17
- init_settings
18
-
19
- @hadoop = HadoopClient.new
20
-
21
- @monitored_jobs = []
22
-
23
- logger.debug "Creating mongo collections."
24
- @conn = Mongo::Connection.new settings.mongo_ip
25
- @db = @conn[settings.mongo_db]
26
-
27
- capped_collection_opts = {
28
- :capped => true,
29
- :size => settings.mongo_collection_size
30
- }
31
-
32
- @collections = {
33
- jobs: @db.create_collection('jobs'),
34
- tasks: @db.create_collection('job_tasks'),
35
- attempts: @db.create_collection('job_task_attempts'),
36
-
37
- job_events: @db.create_collection('job_events', capped_collection_opts),
38
- task_events: @db.create_collection('job_task_events', capped_collection_opts),
39
- }
40
- end
41
-
42
- def run
43
- loop do
44
-
45
- logger.debug "In main event loop."
46
-
47
- running_jobs = @hadoop.jobs_with_state HadoopClient::RUNNING
48
- started_jobs = @hadoop.subtract(running_jobs, @monitored_jobs)
49
- finished_jobs = @hadoop.subtract(@monitored_jobs, running_jobs)
50
-
51
- finished_jobs.each do |job|
52
- logger.debug "#{job.get_id.to_s} is complete."
53
- update_job_stats job, Time.now
54
- end
55
-
56
- running_jobs.each{|job| update_job_stats job, nil, @hadoop.subtract([job], started_jobs).empty? }
57
-
58
- @monitored_jobs = running_jobs
59
-
60
- sleep settings.sleep_seconds
61
-
62
- end
63
- end
64
-
65
- private
66
-
67
- include Configurable
68
-
69
- def update_job_stats job, finish_time = nil, include_properties = false
70
- stats = @hadoop.job_stats(job, finish_time)
71
-
72
- if include_properties
73
- stats[:job][:properties] = @hadoop.job_properties job
74
- end
75
-
76
- logger.debug "upserting job #{JSON.generate stats[:job]}"
77
- @collections[:jobs].update({_id: stats[:job][:_id]}, stats[:job], upsert: true)
78
-
79
- logger.debug "upserting job_event #{JSON.generate stats[:job_event]}"
80
- @collections[:job_events].insert(stats[:job_event])
81
-
82
- logger.debug "upserting tasks #{JSON.generate stats[:tasks]}"
83
- stats[:tasks].each do |task|
84
- @collections[:tasks].update({_id: task[:_id]}, task, upsert: true)
85
- end
86
-
87
- logger.debug "upserting task_events #{JSON.generate stats[:task_events]}"
88
- stats[:task_events].each do |task_event|
89
- @collections[:task_events].insert(task_event)
90
- end
91
-
92
- logger.debug "upserting attempts #{JSON.generate stats[:attempts]}"
93
- stats[:attempts].each do |attempt|
94
- @collections[:attempts].update({_id: attempt[:_id]}, attempt, upsert: true)
95
- end
96
- end
97
-
98
- end
99
- end
100
-
101
- Vayacondios::HadoopMonitor.new.run
@@ -1,65 +0,0 @@
1
- require 'stringio'
2
-
3
- class Vayacondios
4
-
5
- module Hadoopable
6
-
7
- include Configurable
8
-
9
- #--------------------------------------------------------------------------------
10
- # Initialize jruby and tell it about hadoop.
11
- #--------------------------------------------------------------------------------
12
-
13
- begin
14
- require 'java'
15
- rescue LoadError => e
16
- raise "\nJava not found. Are you sure you're running with JRuby?\n#{e.message}"
17
- end
18
-
19
- hadoop_home = ENV['HADOOP_HOME'] || '/usr/lib/hadoop'
20
-
21
- raise "\nHadoop installation not found. Try setting $HADOOP_HOME\n" unless (hadoop_home and (File.exist? hadoop_home))
22
-
23
- $CLASSPATH << File.join(File.join(hadoop_home, 'conf') || ENV['HADOOP_CONF_DIR'],
24
- '') # add trailing slash
25
-
26
- Dir["#{hadoop_home}/{hadoop*.jar,lib/*.jar}"].each{|jar| require jar}
27
-
28
- include_class org.apache.hadoop.mapred.JobConf
29
- include_class org.apache.hadoop.mapred.JobClient
30
- include_class org.apache.hadoop.mapred.JobStatus
31
- include_class org.apache.hadoop.mapred.TIPStatus
32
- include_class org.apache.hadoop.conf.Configuration
33
- #--------------------------------------------------------------------------------
34
-
35
- def get_hadoop_conf
36
- logger.debug "Getting hadoop configuration"
37
-
38
- stderr, $stderr = $stderr, StringIO.new
39
-
40
- conf = Configuration.new
41
-
42
- # per-site defaults
43
- %w[capacity-scheduler.xml core-site.xml hadoop-policy.xml hadoop-site.xml hdfs-site.xml mapred-site.xml].each do |conf_file|
44
- conf.addResource conf_file
45
- end
46
-
47
- conf.reload_configuration
48
-
49
- # per-user overrides
50
- if Swineherd.config[:aws]
51
- conf.set("fs.s3.awsAccessKeyId",Swineherd.config[:aws][:access_key])
52
- conf.set("fs.s3.awsSecretAccessKey",Swineherd.config[:aws][:secret_key])
53
-
54
- conf.set("fs.s3n.awsAccessKeyId",Swineherd.config[:aws][:access_key])
55
- conf.set("fs.s3n.awsSecretAccessKey",Swineherd.config[:aws][:secret_key])
56
- end
57
-
58
- return conf
59
- ensure
60
- stderr_lines = $stderr.string.split("\n")
61
- $stderr = stderr
62
- stderr_lines.each{|line| logger.debug line}
63
- end
64
- end
65
- end