flapjack 1.6.0 → 2.0.0b1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (301) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -6
  3. data/.gitmodules +1 -1
  4. data/.rspec +1 -1
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +12 -13
  7. data/CHANGELOG.md +2 -9
  8. data/CONTRIBUTING.md +7 -2
  9. data/Gemfile +4 -13
  10. data/LICENCE +1 -0
  11. data/README.md +8 -2
  12. data/Rakefile +2 -2
  13. data/bin/flapjack +3 -12
  14. data/build.sh +4 -2
  15. data/etc/flapjack_config.toml.example +273 -0
  16. data/features/ack_after_sched_maint.feature +18 -21
  17. data/features/cli.feature +11 -71
  18. data/features/cli_flapjack-feed-events.feature +14 -15
  19. data/features/cli_flapjack-nagios-receiver.feature +12 -41
  20. data/features/cli_flapper.feature +12 -41
  21. data/features/cli_purge.feature +5 -6
  22. data/features/cli_receive-events.feature +6 -7
  23. data/features/cli_simulate-failed-check.feature +5 -6
  24. data/features/events.feature +206 -181
  25. data/features/events_check_names.feature +4 -7
  26. data/features/notification_rules.feature +144 -223
  27. data/features/notifications.feature +65 -57
  28. data/features/rollup.feature +45 -47
  29. data/features/steps/cli_steps.rb +4 -5
  30. data/features/steps/events_steps.rb +163 -373
  31. data/features/steps/notifications_steps.rb +408 -264
  32. data/features/steps/packaging-lintian_steps.rb +0 -4
  33. data/features/steps/time_travel_steps.rb +0 -26
  34. data/features/support/daemons.rb +6 -31
  35. data/features/support/env.rb +65 -74
  36. data/flapjack.gemspec +22 -24
  37. data/lib/flapjack.rb +14 -7
  38. data/lib/flapjack/cli/flapper.rb +74 -173
  39. data/lib/flapjack/cli/maintenance.rb +278 -109
  40. data/lib/flapjack/cli/migrate.rb +950 -0
  41. data/lib/flapjack/cli/purge.rb +19 -22
  42. data/lib/flapjack/cli/receiver.rb +150 -326
  43. data/lib/flapjack/cli/server.rb +8 -235
  44. data/lib/flapjack/cli/simulate.rb +42 -57
  45. data/lib/flapjack/configuration.rb +51 -37
  46. data/lib/flapjack/coordinator.rb +138 -129
  47. data/lib/flapjack/data/acknowledgement.rb +177 -0
  48. data/lib/flapjack/data/alert.rb +97 -158
  49. data/lib/flapjack/data/check.rb +611 -0
  50. data/lib/flapjack/data/condition.rb +70 -0
  51. data/lib/flapjack/data/contact.rb +226 -456
  52. data/lib/flapjack/data/event.rb +96 -184
  53. data/lib/flapjack/data/extensions/associations.rb +59 -0
  54. data/lib/flapjack/data/extensions/short_name.rb +25 -0
  55. data/lib/flapjack/data/medium.rb +428 -0
  56. data/lib/flapjack/data/metrics.rb +194 -0
  57. data/lib/flapjack/data/notification.rb +22 -281
  58. data/lib/flapjack/data/rule.rb +473 -0
  59. data/lib/flapjack/data/scheduled_maintenance.rb +244 -0
  60. data/lib/flapjack/data/state.rb +221 -0
  61. data/lib/flapjack/data/statistic.rb +112 -0
  62. data/lib/flapjack/data/tag.rb +277 -0
  63. data/lib/flapjack/data/test_notification.rb +182 -0
  64. data/lib/flapjack/data/unscheduled_maintenance.rb +159 -0
  65. data/lib/flapjack/data/validators/id_validator.rb +20 -0
  66. data/lib/flapjack/exceptions.rb +6 -0
  67. data/lib/flapjack/filters/acknowledgement.rb +23 -16
  68. data/lib/flapjack/filters/base.rb +0 -5
  69. data/lib/flapjack/filters/delays.rb +53 -43
  70. data/lib/flapjack/filters/ok.rb +23 -14
  71. data/lib/flapjack/filters/scheduled_maintenance.rb +3 -3
  72. data/lib/flapjack/filters/unscheduled_maintenance.rb +12 -3
  73. data/lib/flapjack/gateways/aws_sns.rb +65 -49
  74. data/lib/flapjack/gateways/aws_sns/alert.text.erb +2 -2
  75. data/lib/flapjack/gateways/aws_sns/alert_subject.text.erb +2 -2
  76. data/lib/flapjack/gateways/aws_sns/rollup_subject.text.erb +1 -1
  77. data/lib/flapjack/gateways/email.rb +107 -90
  78. data/lib/flapjack/gateways/email/alert.html.erb +19 -18
  79. data/lib/flapjack/gateways/email/alert.text.erb +20 -14
  80. data/lib/flapjack/gateways/email/alert_subject.text.erb +2 -1
  81. data/lib/flapjack/gateways/email/rollup.html.erb +14 -13
  82. data/lib/flapjack/gateways/email/rollup.text.erb +13 -10
  83. data/lib/flapjack/gateways/jabber.rb +679 -671
  84. data/lib/flapjack/gateways/jabber/alert.text.erb +9 -6
  85. data/lib/flapjack/gateways/jsonapi.rb +164 -350
  86. data/lib/flapjack/gateways/jsonapi/data/join_descriptor.rb +44 -0
  87. data/lib/flapjack/gateways/jsonapi/data/method_descriptor.rb +21 -0
  88. data/lib/flapjack/gateways/jsonapi/helpers/headers.rb +63 -0
  89. data/lib/flapjack/gateways/jsonapi/helpers/miscellaneous.rb +136 -0
  90. data/lib/flapjack/gateways/jsonapi/helpers/resources.rb +227 -0
  91. data/lib/flapjack/gateways/jsonapi/helpers/serialiser.rb +313 -0
  92. data/lib/flapjack/gateways/jsonapi/helpers/swagger_docs.rb +322 -0
  93. data/lib/flapjack/gateways/jsonapi/methods/association_delete.rb +115 -0
  94. data/lib/flapjack/gateways/jsonapi/methods/association_get.rb +288 -0
  95. data/lib/flapjack/gateways/jsonapi/methods/association_patch.rb +178 -0
  96. data/lib/flapjack/gateways/jsonapi/methods/association_post.rb +116 -0
  97. data/lib/flapjack/gateways/jsonapi/methods/metrics.rb +71 -0
  98. data/lib/flapjack/gateways/jsonapi/methods/resource_delete.rb +119 -0
  99. data/lib/flapjack/gateways/jsonapi/methods/resource_get.rb +186 -0
  100. data/lib/flapjack/gateways/jsonapi/methods/resource_patch.rb +239 -0
  101. data/lib/flapjack/gateways/jsonapi/methods/resource_post.rb +197 -0
  102. data/lib/flapjack/gateways/jsonapi/middleware/array_param_fixer.rb +27 -0
  103. data/lib/flapjack/gateways/jsonapi/{rack → middleware}/json_params_parser.rb +7 -6
  104. data/lib/flapjack/gateways/jsonapi/middleware/request_timestamp.rb +18 -0
  105. data/lib/flapjack/gateways/oobetet.rb +222 -170
  106. data/lib/flapjack/gateways/pager_duty.rb +388 -0
  107. data/lib/flapjack/gateways/pager_duty/alert.text.erb +13 -0
  108. data/lib/flapjack/gateways/slack.rb +56 -48
  109. data/lib/flapjack/gateways/slack/alert.text.erb +1 -1
  110. data/lib/flapjack/gateways/slack/rollup.text.erb +1 -1
  111. data/lib/flapjack/gateways/sms_aspsms.rb +155 -0
  112. data/lib/flapjack/gateways/sms_aspsms/alert.text.erb +7 -0
  113. data/lib/flapjack/gateways/sms_aspsms/rollup.text.erb +2 -0
  114. data/lib/flapjack/gateways/sms_messagenet.rb +77 -57
  115. data/lib/flapjack/gateways/sms_messagenet/alert.text.erb +3 -2
  116. data/lib/flapjack/gateways/sms_nexmo.rb +53 -51
  117. data/lib/flapjack/gateways/sms_nexmo/alert.text.erb +2 -2
  118. data/lib/flapjack/gateways/sms_nexmo/rollup.text.erb +1 -1
  119. data/lib/flapjack/gateways/sms_twilio.rb +79 -62
  120. data/lib/flapjack/gateways/sms_twilio/alert.text.erb +3 -2
  121. data/lib/flapjack/gateways/web.rb +437 -345
  122. data/lib/flapjack/gateways/web/middleware/request_timestamp.rb +18 -0
  123. data/lib/flapjack/gateways/web/public/css/bootstrap.css +3793 -4340
  124. data/lib/flapjack/gateways/web/public/css/bootstrap.css.map +1 -0
  125. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.eot +0 -0
  126. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.svg +273 -214
  127. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.ttf +0 -0
  128. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.woff +0 -0
  129. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.woff2 +0 -0
  130. data/lib/flapjack/gateways/web/public/js/bootstrap.js +1637 -1607
  131. data/lib/flapjack/gateways/web/public/js/self_stats.js +1 -2
  132. data/lib/flapjack/gateways/web/views/_pagination.html.erb +19 -0
  133. data/lib/flapjack/gateways/web/views/check.html.erb +159 -121
  134. data/lib/flapjack/gateways/web/views/checks.html.erb +82 -41
  135. data/lib/flapjack/gateways/web/views/contact.html.erb +59 -71
  136. data/lib/flapjack/gateways/web/views/contacts.html.erb +32 -8
  137. data/lib/flapjack/gateways/web/views/index.html.erb +2 -2
  138. data/lib/flapjack/gateways/web/views/{layout.erb → layout.html.erb} +7 -23
  139. data/lib/flapjack/gateways/web/views/self_stats.html.erb +32 -33
  140. data/lib/flapjack/gateways/web/views/tag.html.erb +32 -0
  141. data/lib/flapjack/gateways/web/views/tags.html.erb +51 -0
  142. data/lib/flapjack/logger.rb +34 -3
  143. data/lib/flapjack/notifier.rb +180 -112
  144. data/lib/flapjack/patches.rb +8 -63
  145. data/lib/flapjack/pikelet.rb +185 -143
  146. data/lib/flapjack/processor.rb +323 -191
  147. data/lib/flapjack/record_queue.rb +33 -0
  148. data/lib/flapjack/redis_proxy.rb +66 -0
  149. data/lib/flapjack/utility.rb +21 -15
  150. data/lib/flapjack/version.rb +2 -1
  151. data/libexec/httpbroker.go +218 -14
  152. data/libexec/oneoff.go +13 -10
  153. data/spec/lib/flapjack/configuration_spec.rb +286 -0
  154. data/spec/lib/flapjack/coordinator_spec.rb +103 -157
  155. data/spec/lib/flapjack/data/check_spec.rb +175 -0
  156. data/spec/lib/flapjack/data/contact_spec.rb +26 -349
  157. data/spec/lib/flapjack/data/event_spec.rb +76 -291
  158. data/spec/lib/flapjack/data/medium_spec.rb +19 -0
  159. data/spec/lib/flapjack/data/rule_spec.rb +43 -0
  160. data/spec/lib/flapjack/data/scheduled_maintenance_spec.rb +976 -0
  161. data/spec/lib/flapjack/data/unscheduled_maintenance_spec.rb +34 -0
  162. data/spec/lib/flapjack/gateways/aws_sns_spec.rb +111 -60
  163. data/spec/lib/flapjack/gateways/email_spec.rb +194 -161
  164. data/spec/lib/flapjack/gateways/jabber_spec.rb +961 -162
  165. data/spec/lib/flapjack/gateways/jsonapi/methods/check_links_spec.rb +155 -0
  166. data/spec/lib/flapjack/gateways/jsonapi/methods/checks_spec.rb +426 -0
  167. data/spec/lib/flapjack/gateways/jsonapi/methods/contact_links_spec.rb +217 -0
  168. data/spec/lib/flapjack/gateways/jsonapi/methods/contacts_spec.rb +425 -0
  169. data/spec/lib/flapjack/gateways/jsonapi/methods/events_spec.rb +271 -0
  170. data/spec/lib/flapjack/gateways/jsonapi/methods/media_spec.rb +257 -0
  171. data/spec/lib/flapjack/gateways/jsonapi/methods/medium_links_spec.rb +163 -0
  172. data/spec/lib/flapjack/gateways/jsonapi/methods/metrics_spec.rb +8 -0
  173. data/spec/lib/flapjack/gateways/jsonapi/methods/rule_links_spec.rb +212 -0
  174. data/spec/lib/flapjack/gateways/jsonapi/methods/rules_spec.rb +289 -0
  175. data/spec/lib/flapjack/gateways/jsonapi/methods/scheduled_maintenance_links_spec.rb +49 -0
  176. data/spec/lib/flapjack/gateways/jsonapi/methods/scheduled_maintenances_spec.rb +242 -0
  177. data/spec/lib/flapjack/gateways/jsonapi/methods/tag_links_spec.rb +274 -0
  178. data/spec/lib/flapjack/gateways/jsonapi/methods/tags_spec.rb +302 -0
  179. data/spec/lib/flapjack/gateways/jsonapi/methods/unscheduled_maintenance_links_spec.rb +49 -0
  180. data/spec/lib/flapjack/gateways/jsonapi/methods/unscheduled_maintenances_spec.rb +339 -0
  181. data/spec/lib/flapjack/gateways/jsonapi_spec.rb +1 -1
  182. data/spec/lib/flapjack/gateways/oobetet_spec.rb +151 -79
  183. data/spec/lib/flapjack/gateways/pager_duty_spec.rb +353 -0
  184. data/spec/lib/flapjack/gateways/slack_spec.rb +53 -53
  185. data/spec/lib/flapjack/gateways/sms_aspsms_spec.rb +106 -0
  186. data/spec/lib/flapjack/gateways/sms_messagenet_spec.rb +111 -54
  187. data/spec/lib/flapjack/gateways/sms_nexmo_spec.rb +50 -51
  188. data/spec/lib/flapjack/gateways/sms_twilio_spec.rb +108 -48
  189. data/spec/lib/flapjack/gateways/web_spec.rb +144 -216
  190. data/spec/lib/flapjack/notifier_spec.rb +132 -1
  191. data/spec/lib/flapjack/pikelet_spec.rb +111 -50
  192. data/spec/lib/flapjack/processor_spec.rb +210 -40
  193. data/spec/lib/flapjack/redis_proxy_spec.rb +45 -0
  194. data/spec/lib/flapjack/utility_spec.rb +11 -15
  195. data/spec/service_consumers/fixture_data.rb +547 -0
  196. data/spec/service_consumers/pact_helper.rb +21 -32
  197. data/spec/service_consumers/pacts/flapjack-diner_v2.0.json +4652 -0
  198. data/spec/service_consumers/provider_states_for_flapjack-diner.rb +279 -322
  199. data/spec/service_consumers/provider_support.rb +8 -0
  200. data/spec/spec_helper.rb +34 -44
  201. data/spec/support/erb_view_helper.rb +1 -1
  202. data/spec/support/factories.rb +58 -0
  203. data/spec/support/jsonapi_helper.rb +15 -26
  204. data/spec/support/mock_logger.rb +43 -0
  205. data/spec/support/xmpp_comparable.rb +24 -0
  206. data/src/flapjack/transport_test.go +30 -1
  207. data/tasks/dump_keys.rake +82 -0
  208. data/tasks/events.rake +7 -7
  209. data/tasks/support/flapjack_config_benchmark.toml +28 -0
  210. data/tasks/support/flapjack_config_benchmark.yaml +0 -2
  211. metadata +175 -222
  212. data/Guardfile +0 -14
  213. data/etc/flapjack_config.yaml.example +0 -477
  214. data/features/cli_flapjack-populator.feature +0 -90
  215. data/features/support/silent_system.rb +0 -4
  216. data/lib/flapjack/cli/import.rb +0 -108
  217. data/lib/flapjack/data/entity.rb +0 -652
  218. data/lib/flapjack/data/entity_check.rb +0 -1044
  219. data/lib/flapjack/data/message.rb +0 -56
  220. data/lib/flapjack/data/migration.rb +0 -234
  221. data/lib/flapjack/data/notification_rule.rb +0 -425
  222. data/lib/flapjack/data/semaphore.rb +0 -44
  223. data/lib/flapjack/data/tagged.rb +0 -48
  224. data/lib/flapjack/gateways/jsonapi/check_methods.rb +0 -206
  225. data/lib/flapjack/gateways/jsonapi/check_presenter.rb +0 -221
  226. data/lib/flapjack/gateways/jsonapi/contact_methods.rb +0 -186
  227. data/lib/flapjack/gateways/jsonapi/entity_methods.rb +0 -223
  228. data/lib/flapjack/gateways/jsonapi/medium_methods.rb +0 -185
  229. data/lib/flapjack/gateways/jsonapi/metrics_methods.rb +0 -132
  230. data/lib/flapjack/gateways/jsonapi/notification_rule_methods.rb +0 -141
  231. data/lib/flapjack/gateways/jsonapi/pagerduty_credential_methods.rb +0 -139
  232. data/lib/flapjack/gateways/jsonapi/report_methods.rb +0 -146
  233. data/lib/flapjack/gateways/pagerduty.rb +0 -318
  234. data/lib/flapjack/gateways/pagerduty/alert.text.erb +0 -10
  235. data/lib/flapjack/gateways/web/public/css/select2-bootstrap.css +0 -87
  236. data/lib/flapjack/gateways/web/public/css/select2.css +0 -615
  237. data/lib/flapjack/gateways/web/public/css/tablesort.css +0 -67
  238. data/lib/flapjack/gateways/web/public/img/select2-spinner.gif +0 -0
  239. data/lib/flapjack/gateways/web/public/img/select2.png +0 -0
  240. data/lib/flapjack/gateways/web/public/img/select2x2.png +0 -0
  241. data/lib/flapjack/gateways/web/public/js/backbone.js +0 -1581
  242. data/lib/flapjack/gateways/web/public/js/backbone.jsonapi.js +0 -322
  243. data/lib/flapjack/gateways/web/public/js/flapjack.js +0 -82
  244. data/lib/flapjack/gateways/web/public/js/jquery.tablesorter.js +0 -1640
  245. data/lib/flapjack/gateways/web/public/js/jquery.tablesorter.widgets.js +0 -1390
  246. data/lib/flapjack/gateways/web/public/js/modules/contact.js +0 -520
  247. data/lib/flapjack/gateways/web/public/js/modules/entity.js +0 -28
  248. data/lib/flapjack/gateways/web/public/js/modules/medium.js +0 -40
  249. data/lib/flapjack/gateways/web/public/js/select2.js +0 -3397
  250. data/lib/flapjack/gateways/web/public/js/tablesort.js +0 -44
  251. data/lib/flapjack/gateways/web/public/js/underscore.js +0 -1276
  252. data/lib/flapjack/gateways/web/views/edit_contacts.html.erb +0 -173
  253. data/lib/flapjack/gateways/web/views/entities.html.erb +0 -30
  254. data/lib/flapjack/gateways/web/views/entity.html.erb +0 -51
  255. data/lib/flapjack/rack_logger.rb +0 -47
  256. data/lib/flapjack/redis_pool.rb +0 -42
  257. data/spec/lib/flapjack/data/entity_check_spec.rb +0 -1418
  258. data/spec/lib/flapjack/data/entity_spec.rb +0 -872
  259. data/spec/lib/flapjack/data/message_spec.rb +0 -30
  260. data/spec/lib/flapjack/data/migration_spec.rb +0 -104
  261. data/spec/lib/flapjack/data/notification_rule_spec.rb +0 -232
  262. data/spec/lib/flapjack/data/notification_spec.rb +0 -53
  263. data/spec/lib/flapjack/data/semaphore_spec.rb +0 -24
  264. data/spec/lib/flapjack/filters/acknowledgement_spec.rb +0 -6
  265. data/spec/lib/flapjack/filters/delays_spec.rb +0 -6
  266. data/spec/lib/flapjack/filters/ok_spec.rb +0 -6
  267. data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +0 -6
  268. data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +0 -6
  269. data/spec/lib/flapjack/gateways/jsonapi/check_methods_spec.rb +0 -315
  270. data/spec/lib/flapjack/gateways/jsonapi/check_presenter_spec.rb +0 -223
  271. data/spec/lib/flapjack/gateways/jsonapi/contact_methods_spec.rb +0 -131
  272. data/spec/lib/flapjack/gateways/jsonapi/entity_methods_spec.rb +0 -389
  273. data/spec/lib/flapjack/gateways/jsonapi/medium_methods_spec.rb +0 -231
  274. data/spec/lib/flapjack/gateways/jsonapi/notification_rule_methods_spec.rb +0 -169
  275. data/spec/lib/flapjack/gateways/jsonapi/pagerduty_credential_methods_spec.rb +0 -114
  276. data/spec/lib/flapjack/gateways/jsonapi/report_methods_spec.rb +0 -590
  277. data/spec/lib/flapjack/gateways/pagerduty_spec.rb +0 -249
  278. data/spec/lib/flapjack/gateways/web/views/check.html.erb_spec.rb +0 -21
  279. data/spec/lib/flapjack/gateways/web/views/contact.html.erb_spec.rb +0 -24
  280. data/spec/lib/flapjack/gateways/web/views/index.html.erb_spec.rb +0 -16
  281. data/spec/lib/flapjack/redis_pool_spec.rb +0 -29
  282. data/spec/service_consumers/pacts/flapjack-diner_v1.0.json +0 -4702
  283. data/tasks/entities.rake +0 -151
  284. data/tasks/profile.rake +0 -282
  285. data/tmp/acknowledge.rb +0 -13
  286. data/tmp/create_config_yaml.rb +0 -16
  287. data/tmp/create_event_ok.rb +0 -30
  288. data/tmp/create_event_unknown.rb +0 -30
  289. data/tmp/create_events_failure.rb +0 -34
  290. data/tmp/create_events_ok.rb +0 -32
  291. data/tmp/create_events_ok_fail_ack_ok.rb +0 -53
  292. data/tmp/create_events_ok_failure.rb +0 -41
  293. data/tmp/create_events_ok_failure_ack.rb +0 -53
  294. data/tmp/dummy_contacts.json +0 -43
  295. data/tmp/dummy_entities.json +0 -37
  296. data/tmp/generate_nagios_test_hosts.rb +0 -16
  297. data/tmp/notification_rules.rb +0 -73
  298. data/tmp/parse_config_yaml.rb +0 -7
  299. data/tmp/redis_find_spurious_unknown_states.rb +0 -52
  300. data/tmp/test_json_post.rb +0 -19
  301. data/tmp/test_notification_rules_api.rb +0 -171
@@ -1,1044 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'flapjack/patches'
4
-
5
- require 'flapjack/data/contact'
6
- require 'flapjack/data/event'
7
- require 'flapjack/data/entity'
8
- require 'flapjack/data/tagged'
9
-
10
- #FIXME: Require chronic_duration in the correct place
11
- require 'chronic_duration'
12
-
13
- # TODO might want to split the class methods out to a separate class, DAO pattern
14
- # ( http://en.wikipedia.org/wiki/Data_access_object ).
15
-
16
- module Flapjack
17
-
18
- module Data
19
-
20
- class EntityCheck
21
-
22
- STATE_OK = 'ok'
23
- STATE_WARNING = 'warning'
24
- STATE_CRITICAL = 'critical'
25
- STATE_UNKNOWN = 'unknown'
26
-
27
- NOTIFICATION_STATES = [:problem, :warning, :critical, :unknown,
28
- :recovery, :acknowledgement]
29
-
30
- include Tagged
31
-
32
- attr_accessor :entity, :check
33
-
34
- def self.add(check_data, options = {})
35
- raise "Redis connection not set" unless redis = options[:redis]
36
-
37
- entity_id = check_data['entity_id']
38
- raise "Entity id not provided" if entity_id.nil? || entity_id.empty?
39
-
40
- check_name = check_data['name']
41
- raise "Name not provided" if check_name.nil? || check_name.empty?
42
-
43
- ent = Flapjack::Data::Entity.find_by_id(entity_id, :redis => redis)
44
-
45
- raise "Entity not found for id '#{entity_id}'" if ent.nil?
46
-
47
- logger = options[:logger]
48
- timestamp = Time.now.to_i
49
-
50
- entity_name = ent.name
51
-
52
- redis.zadd("current_checks:#{entity_name}", timestamp, check_name)
53
- redis.zadd('current_entities', timestamp, entity_name)
54
-
55
- c = self.new(ent, check_name, :logger => logger, :timestamp => timestamp,
56
- :redis => redis)
57
- if check_data['tags'] && check_data['tags'].respond_to?(:each)
58
- c.add_tags(*check_data['tags'])
59
- end
60
- c
61
- end
62
-
63
- def self.for_event_id(event_id, options = {})
64
- raise "Redis connection not set" unless redis = options[:redis]
65
- entity_name, check_name = event_id.split(':', 2)
66
- create_entity = options[:create_entity]
67
- logger = options[:logger]
68
- entity = Flapjack::Data::Entity.find_by_name(entity_name,
69
- :create => create_entity, :logger => logger, :redis => redis)
70
- return if entity.nil?
71
- self.new(entity, check_name, :logger => logger, :redis => redis)
72
- end
73
-
74
- def self.for_entity_name(entity_name, check_name, options = {})
75
- raise "Redis connection not set" unless redis = options[:redis]
76
- create_entity = options[:create_entity]
77
- logger = options[:logger]
78
- entity = Flapjack::Data::Entity.find_by_name(entity_name,
79
- :create => create_entity, :logger => logger, :redis => redis)
80
- self.new(entity, check_name, :logger => logger, :redis => redis)
81
- end
82
-
83
- def self.for_entity_id(entity_id, check, options = {})
84
- raise "Redis connection not set" unless redis = options[:redis]
85
- create_entity = options[:create_entity]
86
- logger = options[:logger]
87
- entity = Flapjack::Data::Entity.find_by_id(entity_id,
88
- :create => create_entity, :logger => logger, :redis => redis)
89
- self.new(entity, check, :redis => redis)
90
- end
91
-
92
- def self.for_entity(entity, check, options = {})
93
- raise "Redis connection not set" unless redis = options[:redis]
94
- logger = options[:logger]
95
- self.new(entity, check, :logger => logger, :redis => redis)
96
- end
97
-
98
- def self.all(options = {})
99
- raise "Redis connection not set" unless redis = options[:redis]
100
- redis.zrange("all_checks", 0, -1).collect do |cname|
101
- self.for_event_id(cname, options)
102
- end
103
- end
104
-
105
- def self.find_current_names_for_entity_name(entity_name, options = {})
106
- raise "Redis connection not set" unless redis = options[:redis]
107
- redis.zrange("current_checks:#{entity_name}", 0, -1)
108
- end
109
-
110
- def self.find_current_names(options = {})
111
- raise "Redis connection not set" unless redis = options[:redis]
112
- self.conflate_to_keys(self.find_current_names_by_entity(:redis => redis))
113
- end
114
-
115
- def self.find_current_names_by_entity(options = {})
116
- raise "Redis connection not set" unless redis = options[:redis]
117
- d = {}
118
- redis.zrange("current_entities", 0, -1).each {|entity|
119
- d[entity] = redis.zrange("current_checks:#{entity}", 0, -1)
120
- }
121
- d
122
- end
123
-
124
- def self.count_current(options = {})
125
- raise "Redis connection not set" unless redis = options[:redis]
126
- redis.zrange("current_entities", 0, -1).inject(0) {|memo, entity|
127
- memo + redis.zcount("current_checks:#{entity}", '-inf', '+inf')
128
- }
129
- end
130
-
131
- def self.find_current_names_failing(options = {})
132
- raise "Redis connection not set" unless redis = options[:redis]
133
- self.conflate_to_keys(self.find_current_names_failing_by_entity(:redis => redis))
134
- end
135
-
136
- def self.find_current_names_failing_by_entity(options = {})
137
- raise "Redis connection not set" unless redis = options[:redis]
138
- redis.zrange("failed_checks", 0, -1).inject({}) do |memo, key|
139
- entity, check = key.split(':', 2)
140
- if !!redis.zscore("current_checks:#{entity}", check)
141
- memo[entity] ||= []
142
- memo[entity] << check
143
- end
144
- memo
145
- end
146
- end
147
-
148
- def self.count_current_failing(options = {})
149
- raise "Redis connection not set" unless redis = options[:redis]
150
- redis.zrange("failed_checks", 0, -1).count do |key|
151
- entity, check = key.split(':', 2)
152
- !!redis.zscore("current_checks:#{entity}", check)
153
- end
154
- end
155
-
156
- def self.unacknowledged_failing(options = {})
157
- raise "Redis connection not set" unless redis = options[:redis]
158
-
159
- redis.zrange('failed_checks', '0', '-1').reject {|entity_check|
160
- redis.exists(entity_check + ':unscheduled_maintenance')
161
- }.collect {|entity_check|
162
- Flapjack::Data::EntityCheck.for_event_id(entity_check, :redis => redis)
163
- }.compact
164
- end
165
-
166
- def self.find_maintenance(options = {})
167
- raise "Redis connection not set" unless redis = options[:redis]
168
- type = options[:type]
169
-
170
- checks_with_maints = redis.zrange("all_checks", 0, -1).select do |ec_name|
171
- # not ideal, but redis internals should essentially make this a lot
172
- # of separate hash lookups
173
- redis.exists("#{ec_name}:#{type}_maintenances")
174
- end
175
-
176
- return [] if checks_with_maints.empty?
177
-
178
- entity_re = options[:entity].nil? ? nil : Regexp.new(options[:entity])
179
- check_re = options[:check].nil? ? nil : Regexp.new(options[:check])
180
- reason_re = options[:reason].nil? ? nil : Regexp.new(options[:reason])
181
-
182
- checks_with_maints.inject([]) do |memo, k|
183
- entity, check = k.split(':', 2)
184
- ec = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => redis)
185
-
186
- # Only return entries which match what was passed in
187
- next memo if (options[:state] && (options[:state] != ec.state)) ||
188
- !(entity_re.nil? || entity_re.match(entity)) ||
189
- !(check_re.nil? || check_re.match(check))
190
-
191
- ec.maintenances(nil, nil, type.to_sym => true).each do |window|
192
- next unless (reason_re.nil? || reason_re.match(window[:summary])) &&
193
- check_maintenance_timestamp(options[:started], window[:start_time]) &&
194
- check_maintenance_timestamp(options[:finishing], window[:end_time]) &&
195
- check_maintenance_interval(options[:duration], window[:duration])
196
-
197
- memo << { :entity => entity,
198
- :check => check,
199
- :state => ec.state
200
- }.merge(window)
201
- end
202
-
203
- memo
204
- end
205
- end
206
-
207
- def self.delete_maintenance(options = {})
208
- raise "Redis connection not set" unless redis = options[:redis]
209
- entries = find_maintenance(options)
210
- # Try to delete all entries passed in, but return false if any entries failed
211
- errors = {}
212
- entries.each do |entry|
213
- identifier = "#{entry[:entity]}:#{entry[:check]}:#{entry[:start_time]}"
214
- if entry[:end_time] < Time.now.to_i
215
- errors[identifier] = "Maintenance can't be deleted as it finished in the past"
216
- else
217
- entity = entry[:entity]
218
- check = entry[:check]
219
-
220
- ec = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => redis)
221
- success = case options[:type]
222
- when 'scheduled'
223
- ec.end_scheduled_maintenance(entry[:start_time])
224
- when 'unscheduled'
225
- ec.end_unscheduled_maintenance(entry[:end_time])
226
- end
227
- errors[identifier] = "The following entry failed to delete: #{entry}" unless success
228
- end
229
- end
230
- errors
231
- end
232
-
233
- def self.create_maintenance(options = {})
234
- raise "Redis connection not set" unless redis = options[:redis]
235
- errors = {}
236
- entities = options[:entity].is_a?(String) ? options[:entity].split(',') : options[:entity]
237
- checks = options[:check].is_a?(String) ? options[:check].split(',') : options[:check]
238
- entities.each do |entity|
239
- # Create the entity if it doesn't exist, so we can schedule maintenance against it
240
- Flapjack::Data::Entity.find_by_name(entity, :redis => redis, :create => true)
241
- checks.each do |check|
242
- ec = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => redis)
243
- started = Chronic.parse(options[:started]).to_i
244
- duration = ChronicDuration.parse(options[:duration]).to_i
245
- raise "Failed to parse start time #{options[:started]}" if started == 0
246
- raise"Failed to parse duration #{options[:duration]}" if duration == 0
247
-
248
- success = case options[:type]
249
- when 'scheduled'
250
- ec.create_scheduled_maintenance(started, duration, :summary => options[:reason])
251
- when 'unscheduled'
252
- ec.create_unscheduled_maintenance(started, duration, :summary => options[:reason])
253
- end
254
- identifier = "#{entity}:#{check}:#{started}"
255
- errors[identifier] = "The following check failed to create: #{identifier}" unless success
256
- end
257
- end
258
- errors
259
- end
260
-
261
-
262
- def self.check_maintenance_interval(input, maintenance_duration)
263
- # If no duration was specified, give back all results
264
- return true unless input
265
- inp = input.downcase
266
-
267
- if inp.start_with?('between')
268
- # Between 3 hours and 4 hours translates to more than 3 hours, less than 4 hours
269
- first, last = inp.match(/between (.*) and (.*)/).captures
270
- suffix = last.match(/\w (.*)/) ? last.match(/\w (.*)/).captures.first : ''
271
-
272
- # If the first duration only contains only a single word, the unit is
273
- # most likely directly after the first word of the the second duration
274
- # eg between 3 and 4 hours
275
- first = "#{first} #{suffix}" unless / /.match(first)
276
- raise "Failed to parse #{first}" unless ChronicDuration.parse(first)
277
- raise "Failed to parse #{last}" unless ChronicDuration.parse(last)
278
-
279
- (first, last = last, first) if ChronicDuration.parse(first) > ChronicDuration.parse(last)
280
- return check_maintenance_interval("more than #{first}", maintenance_duration) && check_maintenance_interval("less than #{last}", maintenance_duration)
281
- end
282
-
283
- # ChronicDuration can't parse timestamps for strings starting with before or after.
284
- # Strip the before or after for the conversion only, but use it for the comparison later
285
- ctime = inp.gsub(/^(more than|less than|before|after)/, '')
286
- input_duration = ChronicDuration.parse(ctime, :keep_zero => true)
287
-
288
- raise "Failed to parse time: #{input}" if input_duration.nil?
289
-
290
- case inp
291
- when /^(less than|before)/
292
- maintenance_duration < input_duration
293
- when /^(more than|after)/
294
- maintenance_duration > input_duration
295
- else
296
- maintenance_duration == input_duration
297
- end
298
- end
299
-
300
- def self.check_maintenance_timestamp(input, maintenance_timestamp)
301
- # If no time was specified, give back all results
302
- return true unless input
303
- inp = input.downcase
304
-
305
- # Chronic can't parse timestamps for strings starting with before, after or in some cases, on.
306
- # Strip the before or after for the conversion only, but use it for the comparison later
307
- ctime = inp.gsub(/^(on|before|after)/, '')
308
-
309
- base_time = Time.now
310
-
311
- case inp
312
- # Between 3 and 4 hours ago translates to more than 3 hours ago, less than 4 hours ago
313
- when /^between/
314
- first, last = inp.match(/between (.*) and (.*)/).captures
315
-
316
- # If the first time only contains only a single word, the unit (and past/future) is
317
- # most likely directly after the first word of the the second time
318
- # eg between 3 and 4 hours ago
319
- suffix = last.match(/\w (.*)/) ? last.match(/\w (.*)/).captures.first : ''
320
- first = "#{first} #{suffix}" unless / /.match(first)
321
-
322
- first += ' from now' unless Chronic.parse(first, :now => base_time)
323
- last += ' from now' unless Chronic.parse(last, :now => base_time)
324
- raise "Failed to parse #{first}" unless ChronicDuration.parse(first)
325
- raise "Failed to parse #{last}" unless ChronicDuration.parse(last)
326
-
327
- (first, last = last, first) if Chronic.parse(first, :now => base_time) > Chronic.parse(last, :now => base_time)
328
- (check_maintenance_timestamp("after #{first}", maintenance_timestamp) &&
329
- check_maintenance_timestamp("before #{last}", maintenance_timestamp))
330
- # On 1/1/15. We use Chronic to work out the minimum and maximum timestamp, and use the same behaviour as between.
331
- when /^on/
332
- first = Chronic.parse(ctime, :guess => false, :now => base_time).first
333
- last = Chronic.parse(ctime, :guess => false, :now => base_time).last
334
- (check_maintenance_timestamp("after #{first}", maintenance_timestamp) &&
335
- check_maintenance_timestamp("before #{last}", maintenance_timestamp))
336
- else
337
- # We assume timestamps are rooted against the current time.
338
- # Chronic doesn't always handle this correctly, so we need to handhold it a little
339
- input_timestamp = Chronic.parse(ctime, :keep_zero => true, :now => base_time).to_i
340
- input_timestamp = Chronic.parse(ctime + ' from now', :keep_zero => true, :now => base_time).to_i if input_timestamp == 0
341
-
342
- raise "Failed to parse time: #{input}" if input_timestamp == 0
343
-
344
- case inp
345
- when /^less than/
346
- if input_timestamp < base_time.to_i
347
- maintenance_timestamp > input_timestamp
348
- else
349
- maintenance_timestamp < input_timestamp
350
- end
351
- when /^more than/
352
- # FIXME: and here is the race condition. input timestamp could be in the previous second
353
- # to Time.now due to code execution time:
354
- if input_timestamp < base_time.to_i
355
- maintenance_timestamp < input_timestamp
356
- else
357
- maintenance_timestamp > input_timestamp
358
- end
359
- when /^before/
360
- maintenance_timestamp < input_timestamp
361
- when /^after/
362
- maintenance_timestamp > input_timestamp
363
- end
364
- end
365
- end
366
-
367
- def self.in_unscheduled_maintenance_for_event_id?(event_id, options)
368
- raise "Redis connection not set" unless redis = options[:redis]
369
- redis.exists("#{event_id}:unscheduled_maintenance")
370
- end
371
-
372
- def self.in_scheduled_maintenance_for_event_id?(event_id, options)
373
- raise "Redis connection not set" unless redis = options[:redis]
374
- redis.exists("#{event_id}:scheduled_maintenance")
375
- end
376
-
377
- def self.state_for_event_id?(event_id, options)
378
- raise "Redis connection not set" unless redis = options[:redis]
379
- redis.hget("check:#{event_id}", 'state')
380
- end
381
-
382
- # takes an array of ages (in seconds) to split all checks up by
383
- # - age means how long since the last update
384
- # - 0 age is implied if not explicitly passed
385
- # returns arrays of all current checks hashed by age range upper bound, eg:
386
- #
387
- # EntityCheck.find_all_split_by_freshness([60, 300], opts) =>
388
- # { 0 => [ 'foo-app-01:SSH' ],
389
- # 60 => [ 'foo-app-01:Ping', 'foo-app-01:Disk / Utilisation' ],
390
- # 300 => [] }
391
- #
392
- # you can also set :counts to true in options and you'll just get the counts, eg:
393
- #
394
- # EntityCheck.find_all_split_by_freshness([60, 300], opts.merge(:counts => true)) =>
395
- # { 0 => 1,
396
- # 60 => 3,
397
- # 300 => 0 }
398
- #
399
- # and you can get the last update time with each check too by passing :with_times => true eg:
400
- #
401
- # EntityCheck.find_all_split_by_freshness([60, 300], opts.merge(:with_times => true)) =>
402
- # { 0 => [ ['foo-app-01:SSH', 1382329923.0] ],
403
- # 60 => [ ['foo-app-01:Ping', 1382329922.0], ['foo-app-01:Disk / Utilisation', 1382329921.0] ],
404
- # 300 => [] }
405
- #
406
- def self.find_all_split_by_freshness(ages, options)
407
- raise "Redis connection not set" unless redis = options[:redis]
408
- logger = options[:logger]
409
-
410
- raise "ages does not respond_to? :each and :each_with_index" unless ages.respond_to?(:each) && ages.respond_to?(:each_with_index)
411
- raise "age values must respond_to? :to_i" unless ages.all? {|age| age.respond_to?(:to_i) }
412
-
413
- ages << 0
414
- ages = ages.sort.uniq
415
-
416
- start_time = Time.now
417
-
418
- checks = []
419
- # get all the current checks, with last update time
420
- Flapjack::Data::Entity.all(:enabled => true, :redis => redis).each do |entity|
421
- redis.zrange("current_checks:#{entity.name}", 0, -1, :withscores => true).each do |check, score|
422
- checks << ["#{entity.name}:#{check}", score]
423
- end
424
- end
425
- logger.debug("found #{checks.length} current checks on enabled entities") if logger
426
-
427
- skeleton = ages.inject({}) {|memo, age| memo[age] = [] ; memo }
428
- age_ranges = ages.reverse.each_cons(2)
429
- results_with_times = checks.inject(skeleton) do |memo, check|
430
- check_age = start_time.to_i - check[1]
431
- check_age = 0 unless check_age > 0
432
- if check_age >= ages.last
433
- memo[ages.last] << check
434
- else
435
- age_range = age_ranges.detect {|a, b| check_age < a && check_age >= b }
436
- memo[age_range.last] << check unless age_range.nil?
437
- end
438
- memo
439
- end
440
-
441
- case
442
- when options[:with_times]
443
- results_with_times
444
- when options[:counts]
445
- results_with_times.inject({}) do |memo, (age, checks)|
446
- memo[age] = checks.length
447
- memo
448
- end
449
- else
450
- results_with_times.inject({}) do |memo, (age, checks)|
451
- memo[age] = checks.map { |check| check[0] }
452
- memo
453
- end
454
- end
455
- end
456
-
457
- def entity_name
458
- entity.name
459
- end
460
-
461
- # takes a key "entity:check", returns true if the check is in unscheduled
462
- # maintenance
463
- def in_unscheduled_maintenance?
464
- @redis.exists("#{@key}:unscheduled_maintenance")
465
- end
466
-
467
- # returns true if the check is in scheduled maintenance
468
- def in_scheduled_maintenance?
469
- @redis.exists("#{@key}:scheduled_maintenance")
470
- end
471
-
472
- # return data about current maintenance (scheduled or unscheduled, as specified)
473
- def current_maintenance(opts = {})
474
- sched = opts[:scheduled] ? 'scheduled' : 'unscheduled'
475
- ts = @redis.get("#{@key}:#{sched}_maintenance")
476
- return unless ts
477
- {:start_time => ts.to_i,
478
- :duration => @redis.zscore("#{@key}:#{sched}_maintenances", ts),
479
- :summary => @redis.get("#{@key}:#{ts}:#{sched}_maintenance:summary"),
480
- }
481
- end
482
-
483
- def create_unscheduled_maintenance(start_time, duration, opts = {})
484
- raise ArgumentError, 'start time must be provided as a Unix timestamp' unless start_time && start_time.is_a?(Integer)
485
- raise ArgumentError, 'duration in seconds must be provided' unless duration && duration.is_a?(Integer) && (duration > 0)
486
-
487
- summary = opts[:summary]
488
- time_remaining = (start_time + duration) - Time.now.to_i
489
- if time_remaining > 0
490
- end_unscheduled_maintenance(start_time) if in_unscheduled_maintenance?
491
- @redis.setex("#{@key}:unscheduled_maintenance", time_remaining, start_time)
492
- end
493
- @redis.zadd("#{@key}:unscheduled_maintenances", duration, start_time)
494
- @redis.set("#{@key}:#{start_time}:unscheduled_maintenance:summary", summary)
495
-
496
- @redis.zadd("#{@key}:sorted_unscheduled_maintenance_timestamps", start_time, start_time)
497
- end
498
-
499
- # ends any unscheduled maintenance
500
- def end_unscheduled_maintenance(end_time)
501
- raise ArgumentError, 'end time must be provided as a Unix timestamp' unless end_time && end_time.is_a?(Integer)
502
-
503
- if (um_start = @redis.get("#{@key}:unscheduled_maintenance"))
504
- duration = end_time - um_start.to_i
505
- @logger.debug("ending unscheduled downtime for #{@key} at #{Time.at(end_time).to_s}") if @logger
506
- @redis.zadd("#{@key}:unscheduled_maintenances", duration, um_start) # updates existing UM 'score'
507
- @redis.del("#{@key}:unscheduled_maintenance") == 1
508
- else
509
- @logger.debug("end_unscheduled_maintenance called for #{@key} but none found") if @logger
510
- true
511
- end
512
- end
513
-
514
- # creates a scheduled maintenance period for a check
515
- # TODO: consider adding some validation to the data we're adding in here
516
- # eg start_time is a believable unix timestamp (not in the past and not too
517
- # far in the future), duration is within some bounds...
518
- def create_scheduled_maintenance(start_time, duration, opts = {})
519
- raise ArgumentError, 'start time must be provided as a Unix timestamp' unless start_time && start_time.is_a?(Integer)
520
- raise ArgumentError, 'duration in seconds must be provided' unless duration && duration.is_a?(Integer) && (duration > 0)
521
-
522
- summary = opts[:summary]
523
- @redis.zadd("#{@key}:scheduled_maintenances", duration, start_time)
524
- @redis.set("#{@key}:#{start_time}:scheduled_maintenance:summary", summary)
525
-
526
- @redis.zadd("#{@key}:sorted_scheduled_maintenance_timestamps", start_time, start_time)
527
-
528
- # scheduled maintenance periods have changed, revalidate
529
- update_current_scheduled_maintenance(:revalidate => true)
530
- end
531
-
532
- # if not in scheduled maintenance, looks in scheduled maintenance list for a check to see if
533
- # current state should be set to scheduled maintenance, and sets it as appropriate
534
- def update_current_scheduled_maintenance(opts = {})
535
- if opts[:revalidate]
536
- @redis.del("#{@key}:scheduled_maintenance")
537
- else
538
- return if in_scheduled_maintenance?
539
- end
540
-
541
- # are we within a scheduled maintenance period?
542
- current_time = Time.now.to_i
543
- current_sched_ms = maintenances(nil, nil, :scheduled => true).select {|sm|
544
- (sm[:start_time] <= current_time) && (current_time < sm[:end_time])
545
- }
546
- return if current_sched_ms.empty?
547
-
548
- # yes! so set current scheduled maintenance
549
- # if multiple scheduled maintenances found, find the end_time furthest in the future
550
- most_futuristic = current_sched_ms.max {|sm| sm[:end_time] }
551
- start_time = most_futuristic[:start_time]
552
-
553
- duration = most_futuristic[:end_time] - current_time
554
- if duration > 0
555
- @redis.setex("#{@key}:scheduled_maintenance", duration.to_i, start_time)
556
- end
557
- end
558
-
559
- # TODO allow summary to be changed as part of the termination
560
- def end_scheduled_maintenance(start_time)
561
- raise ArgumentError, 'start time must be supplied as a Unix timestamp' unless start_time && start_time.is_a?(Integer)
562
-
563
- # don't do anything if a scheduled maintenance period with that start time isn't stored
564
- duration = @redis.zscore("#{@key}:scheduled_maintenances", start_time)
565
- return false if duration.nil?
566
-
567
- current_time = Time.now.to_i
568
-
569
- if start_time > current_time
570
- # the scheduled maintenance period (if it exists) is in the future
571
- @redis.del("#{@key}:#{start_time}:scheduled_maintenance:summary")
572
- @redis.zrem("#{@key}:scheduled_maintenances", start_time)
573
-
574
- @redis.zremrangebyscore("#{@key}:sorted_scheduled_maintenance_timestamps", start_time, start_time)
575
-
576
- # scheduled maintenance periods (may) have changed, revalidate
577
- update_current_scheduled_maintenance(:revalidate => true)
578
-
579
- return true
580
- elsif (start_time + duration) > current_time
581
- # it spans the current time, so we'll stop it at that point
582
- new_duration = current_time - start_time
583
- @redis.zadd("#{@key}:scheduled_maintenances", new_duration, start_time)
584
-
585
- # scheduled maintenance periods have changed, revalidate
586
- update_current_scheduled_maintenance(:revalidate => true)
587
-
588
- return true
589
- end
590
-
591
- false
592
- end
593
-
594
- # returns nil if no previous state; this must be considered as a possible
595
- # state by classes using this model
596
- def state
597
- @redis.hget("check:#{@key}", 'state')
598
- end
599
-
600
- def update_state(new_state, options = {})
601
- return unless [STATE_OK, STATE_WARNING,
602
- STATE_CRITICAL, STATE_UNKNOWN].include?(new_state)
603
-
604
- timestamp = options[:timestamp] || Time.now.to_i
605
- summary = options[:summary]
606
- details = options[:details]
607
- perfdata = options[:perfdata]
608
- count = options[:count]
609
- initial_delay = options[:initial_failure_delay]
610
- repeat_delay = options[:repeat_failure_delay]
611
-
612
- old_state = self.state
613
-
614
- @redis.multi do |multi|
615
-
616
- if old_state != new_state
617
-
618
- # Note the current state (for speedy lookups)
619
- multi.hset("check:#{@key}", 'state', new_state)
620
-
621
- # FIXME: rename to last_state_change?
622
- multi.hset("check:#{@key}", 'last_change', timestamp)
623
-
624
- case new_state
625
- when STATE_WARNING, STATE_CRITICAL, STATE_UNKNOWN
626
- multi.zadd('failed_checks', timestamp, @key)
627
- # FIXME: Iterate through a list of tags associated with an entity:check pair, and update counters
628
- else
629
- multi.zrem("failed_checks", @key)
630
- # FIXME: Iterate through a list of tags associated with an entity:check pair, and update counters
631
- end
632
-
633
- # Retain event data for entity:check pair
634
- # NB (appending to tail as far as Redis is concerned)
635
- multi.rpush("#{@key}:states", timestamp)
636
- multi.set("#{@key}:#{timestamp}:state", new_state)
637
- multi.set("#{@key}:#{timestamp}:summary", summary) if summary
638
- multi.set("#{@key}:#{timestamp}:details", details) if details
639
- multi.set("#{@key}:#{timestamp}:count", count) if count
640
-
641
- multi.zadd("#{@key}:sorted_state_timestamps", timestamp, timestamp)
642
- end
643
-
644
- # Track when we last saw an event for a particular entity:check pair
645
- # (used to be last_update=, but needs to happen in the multi block)
646
- multi.hset("check:#{@key}", 'last_update', timestamp)
647
- multi.zadd("all_checks", timestamp, @key)
648
- multi.zadd("all_checks:#{entity.name}", timestamp, check)
649
- multi.zadd("current_checks:#{entity.name}", timestamp, check)
650
- multi.zadd('current_entities', timestamp, entity.name)
651
-
652
- # Even if this isn't a state change, we need to update the current state
653
- # hash summary and details (as they may have changed)
654
- multi.hset("check:#{@key}", 'summary', (summary || ''))
655
- multi.hset("check:#{@key}", 'details', (details || ''))
656
-
657
- # NB: delays will revert to defaults if event sources don't continue sending
658
- # through their custom delays in the event structure
659
- multi.hset("check:#{@key}", 'initial_failure_delay', (initial_delay || Flapjack::DEFAULT_INITIAL_FAILURE_DELAY))
660
- multi.hset("check:#{@key}", 'repeat_failure_delay', (repeat_delay || Flapjack::DEFAULT_REPEAT_FAILURE_DELAY))
661
- if perfdata
662
- multi.hset("check:#{@key}", 'perfdata', format_perfdata(perfdata).to_json)
663
- # multi.set("#{@key}:#{timestamp}:perfdata", perfdata)
664
- end
665
-
666
- end
667
- end
668
-
669
- def last_update
670
- lu = @redis.hget("check:#{@key}", 'last_update')
671
- return unless lu && !!(lu =~ /^\d+$/)
672
- lu.to_i
673
- end
674
-
675
- # disables a check (removes currency)
676
- def disable!
677
- timestamp = Time.now.to_i
678
- @logger.debug("disabling check [#{@key}]") if @logger
679
- entity_name = entity.name
680
- @redis.zadd("all_checks", timestamp, @key)
681
- @redis.zadd("all_checks:#{entity_name}", timestamp, check)
682
- @redis.zrem("current_checks:#{entity_name}", check)
683
- if @redis.zcount("current_checks:#{entity_name}", '-inf', '+inf') == 0
684
- @redis.zrem("current_entities", entity.name)
685
- end
686
- end
687
-
688
- def enable!
689
- timestamp = Time.now.to_i
690
- entity_name = entity.name
691
- @redis.zadd("all_checks", timestamp, @key)
692
- @redis.zadd("all_checks:#{entity_name}", timestamp, check)
693
- @redis.zadd("current_checks:#{entity_name}", timestamp, check)
694
- @redis.zadd('current_entities', timestamp, entity_name)
695
- end
696
-
697
- def enabled?
698
- !!@redis.zscore("current_checks:#{entity.name}", check)
699
- end
700
-
701
- def last_change
702
- lc = @redis.hget("check:#{@key}", 'last_change')
703
- return unless lc && !!(lc =~ /^\d+$/)
704
- lc.to_i
705
- end
706
-
707
- def last_notification_for_state(state)
708
- return unless NOTIFICATION_STATES.include?(state)
709
- ln = @redis.get("#{@key}:last_#{state.to_s}_notification")
710
- return {:timestamp => nil, :summary => nil} unless (ln && ln =~ /^\d+$/)
711
- { :timestamp => ln.to_i,
712
- :summary => @redis.get("#{@key}:#{ln.to_i}:summary") }
713
- end
714
-
715
- def last_notifications_of_each_type
716
- NOTIFICATION_STATES.inject({}) do |memo, state|
717
- memo[state] = last_notification_for_state(state) unless (state == :problem)
718
- memo
719
- end
720
- end
721
-
722
- def max_notified_severity_of_current_failure
723
- last_recovery = last_notification_for_state(:recovery)[:timestamp] || 0
724
-
725
- last_critical = last_notification_for_state(:critical)[:timestamp]
726
- return STATE_CRITICAL if last_critical && (last_critical > last_recovery)
727
-
728
- last_warning = last_notification_for_state(:warning)[:timestamp]
729
- return STATE_WARNING if last_warning && (last_warning > last_recovery)
730
-
731
- last_unknown = last_notification_for_state(:unknown)[:timestamp]
732
- return STATE_UNKNOWN if last_unknown && (last_unknown > last_recovery)
733
-
734
- nil
735
- end
736
-
737
- # unpredictable results if there are multiple notifications of different
738
- # types sent at the same time
739
- def last_notification
740
- nils = { :type => nil, :timestamp => nil, :summary => nil }
741
-
742
- lne = last_notifications_of_each_type
743
- ln = lne.delete_if {|type, notif| notif[:timestamp].nil? || notif[:timestamp].to_i <= 0 }
744
- if ln.find {|type, notif| type == :warning or type == :critical}
745
- ln = ln.delete_if {|type, notif| type == :problem }
746
- end
747
- return nils if ln.empty?
748
- lns = ln.sort_by { |type, notif| notif[:timestamp] }.last
749
- { :type => lns[0], :timestamp => lns[1][:timestamp], :summary => lns[1][:summary] }
750
- end
751
-
752
- def event_count_at(timestamp)
753
- eca = @redis.get("#{@key}:#{timestamp}:count")
754
- return unless (eca && eca =~ /^\d+$/)
755
- eca.to_i
756
- end
757
-
758
- def failed?
759
- [STATE_WARNING, STATE_CRITICAL, STATE_UNKNOWN].include?( state )
760
- end
761
-
762
- def ok?
763
- [STATE_OK].include?( state )
764
- end
765
-
766
- def summary
767
- @redis.hget("check:#{@key}", 'summary')
768
- end
769
-
770
- def details
771
- @redis.hget("check:#{@key}", 'details')
772
- end
773
-
774
- def perfdata
775
- data = @redis.hget("check:#{@key}", 'perfdata')
776
- begin
777
- data = JSON.parse(data) if data
778
- rescue
779
- data = "Unable to parse string: #{data}"
780
- end
781
-
782
- data = [data] if data.is_a?(Hash)
783
- data
784
- end
785
-
786
- def initial_failure_delay
787
- delay = @redis.hget("check:#{@key}", 'initial_failure_delay')
788
- delay.to_i unless delay.nil?
789
- end
790
-
791
- def repeat_failure_delay
792
- delay = @redis.hget("check:#{@key}", 'repeat_failure_delay')
793
- delay.to_i unless delay.nil?
794
- end
795
-
796
- # Returns a list of states for this entity check, sorted by timestamp.
797
- #
798
- # start_time and end_time should be passed as integer timestamps; these timestamps
799
- # will be considered inclusively, so, e.g. coverage for a day should go
800
- # from midnight to 11:59:59 PM. Pass nil for either end to leave that
801
- # side unbounded.
802
- def historical_states(start_time, end_time, opts = {})
803
- start_time = '-inf' if start_time.to_i <= 0
804
- end_time = '+inf' if end_time.to_i <= 0
805
-
806
- args = ["#{@key}:sorted_state_timestamps"]
807
-
808
- order = opts[:order]
809
- if (order && 'desc'.eql?(order.downcase))
810
- query = :zrevrangebyscore
811
- args += [end_time.to_s, start_time.to_s]
812
- else
813
- query = :zrangebyscore
814
- args += [start_time.to_s, end_time.to_s]
815
- end
816
-
817
- if opts[:limit] && (opts[:limit].to_i > 0)
818
- args << {:limit => [0, opts[:limit]]}
819
- end
820
-
821
- state_ts = @redis.send(query, *args)
822
-
823
- state_data = nil
824
-
825
- @redis.multi do |r|
826
- state_data = state_ts.collect {|ts|
827
- {:timestamp => ts.to_i,
828
- :state => r.get("#{@key}:#{ts}:state"),
829
- :summary => r.get("#{@key}:#{ts}:summary"),
830
- :details => r.get("#{@key}:#{ts}:details"),
831
- # :count => r.get("#{@key}:#{ts}:count"),
832
- # :check_latency => r.get("#{@key}:#{ts}:check_latency")
833
- }
834
- }
835
- end
836
-
837
- # The redis commands in a pipeline block return future objects, which
838
- # must be evaluated. This relies on a patch in flapjack/patches.rb to
839
- # make the Future objects report their class.
840
- state_data.collect {|sd|
841
- sd.merge!(sd) {|k,ov,nv|
842
- (nv.class == Redis::Future) ? nv.value : nv
843
- }
844
- }
845
- end
846
-
847
- # requires a known state timestamp, i.e. probably one returned via
848
- # historical_states. will find the one before that in the sorted set,
849
- # if any.
850
- def historical_state_before(timestamp)
851
- pos = @redis.zrank("#{@key}:sorted_state_timestamps", timestamp)
852
- return if pos.nil? || pos < 1
853
- ts = @redis.zrange("#{@key}:sorted_state_timestamps", pos - 1, pos)
854
- return if ts.nil? || ts.empty?
855
- {:timestamp => ts.first.to_i,
856
- :state => @redis.get("#{@key}:#{ts.first}:state"),
857
- :summary => @redis.get("#{@key}:#{ts.first}:summary"),
858
- :details => @redis.get("#{@key}:#{ts.first}:details")}
859
- end
860
-
861
- # Returns a list of maintenance periods (either unscheduled or scheduled) for this
862
- # entity check, sorted by timestamp.
863
- #
864
- # start_time and end_time should be passed as integer timestamps; these timestamps
865
- # will be considered inclusively, so, e.g. coverage for a day should go
866
- # from midnight to 11:59:59 PM. Pass nil for either end to leave that
867
- # side unbounded.
868
- def maintenances(start_time, end_time, opts = {})
869
- sched = opts[:scheduled] ? 'scheduled' : 'unscheduled'
870
-
871
- start_time ||= '-inf'
872
- end_time ||= '+inf'
873
- order = opts[:order]
874
- query = (order && 'desc'.eql?(order.downcase)) ? :zrevrangebyscore : :zrangebyscore
875
- maint_ts = @redis.send(query, "#{@key}:sorted_#{sched}_maintenance_timestamps", start_time, end_time)
876
-
877
- maint_data = nil
878
-
879
- @redis.multi do |r|
880
- maint_data = maint_ts.collect {|ts|
881
- {:start_time => ts.to_i,
882
- :duration => r.zscore("#{@key}:#{sched}_maintenances", ts),
883
- :summary => r.get("#{@key}:#{ts}:#{sched}_maintenance:summary"),
884
- }
885
- }
886
- end
887
-
888
- # The redis commands in a pipeline block return future objects, which
889
- # must be evaluated. This relies on a patch in flapjack/patches.rb to
890
- # make the Future objects report their class.
891
- maint_data.collect {|md|
892
- md.merge!(md) {|k,ov,nv| (nv.class == Redis::Future) ? nv.value : nv }
893
- md[:end_time] = (md[:start_time] + md[:duration]).floor
894
- md
895
- }
896
- end
897
-
898
- # takes a check, looks up contacts that are interested in this check (or in the check's entity)
899
- # and returns an array of contact records
900
- def contacts
901
- contact_ids = @redis.smembers("contacts_for:#{entity.id}:#{check}")
902
-
903
- if @logger
904
- @logger.debug("#{contact_ids.length} contact(s) for #{entity.id}:#{check}: " +
905
- contact_ids.inspect)
906
- end
907
-
908
- entity.contacts + contact_ids.collect {|c_id|
909
- Flapjack::Data::Contact.find_by_id(c_id, :redis => @redis, :logger => @logger)
910
- }.compact
911
- end
912
-
913
- # override default, which would be 'entity_check_tag'
914
- def tag_prefix
915
- 'check_tag'
916
- end
917
-
918
- def tags_with_entity_and_check_name
919
- tags_without_entity_and_check_name
920
-
921
- # ensure that returned tags include split entity and check words
922
- @tags += @entity.name.split('.', 2).map {|x| x.downcase} +
923
- @check.split(' ').map {|x| x.downcase}
924
-
925
- @tags
926
- end
927
-
928
- alias_method :tags_without_entity_and_check_name, :tags
929
- alias_method :tags, :tags_with_entity_and_check_name
930
-
931
- def ack_hash
932
- @ack_hash ||= @redis.hget('check_hashes_by_id', @key)
933
- if @ack_hash.nil?
934
- sha1 = Digest::SHA1.new
935
- @ack_hash = Digest.hexencode(sha1.digest(@key))[0..7].downcase
936
- @redis.multi do |r|
937
- r.hset("checks_by_hash", @ack_hash, @key)
938
- r.hset("check_hashes_by_id", @key, @ack_hash)
939
- end
940
- end
941
- @ack_hash
942
- end
943
-
944
- def purge_history(opts = {})
945
- t = Time.now
946
- older_than = opts[:older_than] # purge older than this number of seconds ago
947
- raise ":older_than must be supplied" unless older_than
948
-
949
- purge_stamps = historical_states(-1, t.to_i - older_than).map {|s| s[:timestamp]}
950
- unless purge_stamps.empty?
951
- @logger.info "purging #{purge_stamps.length} states from #{@key}" if @logger
952
- deletees = []
953
- purge_stamps.each do |timestamp|
954
- deletees << "#{@key}:#{timestamp}:state"
955
- deletees << "#{@key}:#{timestamp}:summary"
956
- deletees << "#{@key}:#{timestamp}:count"
957
- deletees << "#{@key}:#{timestamp}:check_latency"
958
- end
959
- @logger.info " deleting a bunch of keys 100 at a time..." if @logger
960
- deletees.each_slice(100) do |batch|
961
- @redis.del(batch)
962
- end
963
- @logger.info " removing a range of items from the #{@key}:sorted_state_timestamps sorted set" if @logger
964
- @redis.zremrangebyscore("#{@key}:sorted_state_timestamps", '-inf', t.to_i - older_than)
965
- @logger.info " getting the #{@key}:states list" if @logger
966
- states = @redis.lrange("#{@key}:states", 0, -1)
967
- index = 0
968
- while states[index].to_i < older_than do
969
- index += 1
970
- end
971
- @logger.info " trimming the #{@key}:states from #{index}, length #{states.length}" if @logger
972
- @redis.ltrim("#{@key}:states", index, -1)
973
- end
974
- purge_stamps.length
975
- end
976
-
977
- def self.enabled_for(check_ids, opts = {})
978
- raise "Redis connection not set" unless redis = opts[:redis]
979
-
980
- check_ids.inject([]) do |memo, check_id|
981
- entity_name, check_name = check_id.split(':', 2)
982
- memo << check_id unless redis.zscore("current_checks:#{entity_name}", check_name).nil?
983
- memo
984
- end
985
- end
986
-
987
- def to_jsonapi(opts = {})
988
- json_data = {
989
- "id" => @key,
990
- "name" => @check,
991
- "entity_name" => @entity.name,
992
- "enabled" => opts[:enabled].is_a?(TrueClass),
993
- "tags" => self.tags.to_a,
994
- "links" => {
995
- :entities => opts[:entity_ids] || [],
996
- }
997
- }
998
- Flapjack.dump_json(json_data)
999
- end
1000
-
1001
- private
1002
-
1003
- def initialize(entity, check, options = {})
1004
- raise "Redis connection not set" unless @redis = options[:redis]
1005
- raise "Invalid entity (#{entity.inspect})" unless @entity = Flapjack.sanitize(entity)
1006
- raise "Invalid check (#{check.inspect} on #{entity.inspect})" unless @check = Flapjack.sanitize(check)
1007
- @key = "#{entity.name}:#{check}"
1008
- if @redis.zscore("all_checks", @key).nil?
1009
- timestamp = options[:timestamp] || Time.now.to_i
1010
- @redis.zadd("all_checks", timestamp, @key)
1011
- @redis.zadd("all_checks:#{entity.name}", timestamp, check)
1012
- end
1013
- @logger = options[:logger]
1014
- end
1015
-
1016
- def self.conflate_to_keys(entity_checks_hash)
1017
- entity_checks_hash.inject([]) {|memo, (entity, checks)|
1018
- memo += checks.collect {|check| "#{entity}:#{check}" }
1019
- memo
1020
- }
1021
- end
1022
-
1023
- def format_perfdata(perfdata)
1024
- # example perfdata: time=0.486630s;;;0.000000 size=909B;;;0
1025
- items = perfdata.split(' ')
1026
- # Do some fancy regex
1027
- data = []
1028
- items.each do |item|
1029
- components = item.split '='
1030
- key = components[0].to_s
1031
- value = ""
1032
- if components[1]
1033
- value = components[1].split(';')[0].to_s
1034
- end
1035
- data << {"key" => key, "value" => value}
1036
- end
1037
- data
1038
- end
1039
-
1040
- end
1041
-
1042
- end
1043
-
1044
- end