flapjack 1.6.0 → 2.0.0b1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -6
  3. data/.gitmodules +1 -1
  4. data/.rspec +1 -1
  5. data/.ruby-version +1 -1
  6. data/.travis.yml +12 -13
  7. data/CHANGELOG.md +2 -9
  8. data/CONTRIBUTING.md +7 -2
  9. data/Gemfile +4 -13
  10. data/LICENCE +1 -0
  11. data/README.md +8 -2
  12. data/Rakefile +2 -2
  13. data/bin/flapjack +3 -12
  14. data/build.sh +4 -2
  15. data/etc/flapjack_config.toml.example +273 -0
  16. data/features/ack_after_sched_maint.feature +18 -21
  17. data/features/cli.feature +11 -71
  18. data/features/cli_flapjack-feed-events.feature +14 -15
  19. data/features/cli_flapjack-nagios-receiver.feature +12 -41
  20. data/features/cli_flapper.feature +12 -41
  21. data/features/cli_purge.feature +5 -6
  22. data/features/cli_receive-events.feature +6 -7
  23. data/features/cli_simulate-failed-check.feature +5 -6
  24. data/features/events.feature +206 -181
  25. data/features/events_check_names.feature +4 -7
  26. data/features/notification_rules.feature +144 -223
  27. data/features/notifications.feature +65 -57
  28. data/features/rollup.feature +45 -47
  29. data/features/steps/cli_steps.rb +4 -5
  30. data/features/steps/events_steps.rb +163 -373
  31. data/features/steps/notifications_steps.rb +408 -264
  32. data/features/steps/packaging-lintian_steps.rb +0 -4
  33. data/features/steps/time_travel_steps.rb +0 -26
  34. data/features/support/daemons.rb +6 -31
  35. data/features/support/env.rb +65 -74
  36. data/flapjack.gemspec +22 -24
  37. data/lib/flapjack.rb +14 -7
  38. data/lib/flapjack/cli/flapper.rb +74 -173
  39. data/lib/flapjack/cli/maintenance.rb +278 -109
  40. data/lib/flapjack/cli/migrate.rb +950 -0
  41. data/lib/flapjack/cli/purge.rb +19 -22
  42. data/lib/flapjack/cli/receiver.rb +150 -326
  43. data/lib/flapjack/cli/server.rb +8 -235
  44. data/lib/flapjack/cli/simulate.rb +42 -57
  45. data/lib/flapjack/configuration.rb +51 -37
  46. data/lib/flapjack/coordinator.rb +138 -129
  47. data/lib/flapjack/data/acknowledgement.rb +177 -0
  48. data/lib/flapjack/data/alert.rb +97 -158
  49. data/lib/flapjack/data/check.rb +611 -0
  50. data/lib/flapjack/data/condition.rb +70 -0
  51. data/lib/flapjack/data/contact.rb +226 -456
  52. data/lib/flapjack/data/event.rb +96 -184
  53. data/lib/flapjack/data/extensions/associations.rb +59 -0
  54. data/lib/flapjack/data/extensions/short_name.rb +25 -0
  55. data/lib/flapjack/data/medium.rb +428 -0
  56. data/lib/flapjack/data/metrics.rb +194 -0
  57. data/lib/flapjack/data/notification.rb +22 -281
  58. data/lib/flapjack/data/rule.rb +473 -0
  59. data/lib/flapjack/data/scheduled_maintenance.rb +244 -0
  60. data/lib/flapjack/data/state.rb +221 -0
  61. data/lib/flapjack/data/statistic.rb +112 -0
  62. data/lib/flapjack/data/tag.rb +277 -0
  63. data/lib/flapjack/data/test_notification.rb +182 -0
  64. data/lib/flapjack/data/unscheduled_maintenance.rb +159 -0
  65. data/lib/flapjack/data/validators/id_validator.rb +20 -0
  66. data/lib/flapjack/exceptions.rb +6 -0
  67. data/lib/flapjack/filters/acknowledgement.rb +23 -16
  68. data/lib/flapjack/filters/base.rb +0 -5
  69. data/lib/flapjack/filters/delays.rb +53 -43
  70. data/lib/flapjack/filters/ok.rb +23 -14
  71. data/lib/flapjack/filters/scheduled_maintenance.rb +3 -3
  72. data/lib/flapjack/filters/unscheduled_maintenance.rb +12 -3
  73. data/lib/flapjack/gateways/aws_sns.rb +65 -49
  74. data/lib/flapjack/gateways/aws_sns/alert.text.erb +2 -2
  75. data/lib/flapjack/gateways/aws_sns/alert_subject.text.erb +2 -2
  76. data/lib/flapjack/gateways/aws_sns/rollup_subject.text.erb +1 -1
  77. data/lib/flapjack/gateways/email.rb +107 -90
  78. data/lib/flapjack/gateways/email/alert.html.erb +19 -18
  79. data/lib/flapjack/gateways/email/alert.text.erb +20 -14
  80. data/lib/flapjack/gateways/email/alert_subject.text.erb +2 -1
  81. data/lib/flapjack/gateways/email/rollup.html.erb +14 -13
  82. data/lib/flapjack/gateways/email/rollup.text.erb +13 -10
  83. data/lib/flapjack/gateways/jabber.rb +679 -671
  84. data/lib/flapjack/gateways/jabber/alert.text.erb +9 -6
  85. data/lib/flapjack/gateways/jsonapi.rb +164 -350
  86. data/lib/flapjack/gateways/jsonapi/data/join_descriptor.rb +44 -0
  87. data/lib/flapjack/gateways/jsonapi/data/method_descriptor.rb +21 -0
  88. data/lib/flapjack/gateways/jsonapi/helpers/headers.rb +63 -0
  89. data/lib/flapjack/gateways/jsonapi/helpers/miscellaneous.rb +136 -0
  90. data/lib/flapjack/gateways/jsonapi/helpers/resources.rb +227 -0
  91. data/lib/flapjack/gateways/jsonapi/helpers/serialiser.rb +313 -0
  92. data/lib/flapjack/gateways/jsonapi/helpers/swagger_docs.rb +322 -0
  93. data/lib/flapjack/gateways/jsonapi/methods/association_delete.rb +115 -0
  94. data/lib/flapjack/gateways/jsonapi/methods/association_get.rb +288 -0
  95. data/lib/flapjack/gateways/jsonapi/methods/association_patch.rb +178 -0
  96. data/lib/flapjack/gateways/jsonapi/methods/association_post.rb +116 -0
  97. data/lib/flapjack/gateways/jsonapi/methods/metrics.rb +71 -0
  98. data/lib/flapjack/gateways/jsonapi/methods/resource_delete.rb +119 -0
  99. data/lib/flapjack/gateways/jsonapi/methods/resource_get.rb +186 -0
  100. data/lib/flapjack/gateways/jsonapi/methods/resource_patch.rb +239 -0
  101. data/lib/flapjack/gateways/jsonapi/methods/resource_post.rb +197 -0
  102. data/lib/flapjack/gateways/jsonapi/middleware/array_param_fixer.rb +27 -0
  103. data/lib/flapjack/gateways/jsonapi/{rack → middleware}/json_params_parser.rb +7 -6
  104. data/lib/flapjack/gateways/jsonapi/middleware/request_timestamp.rb +18 -0
  105. data/lib/flapjack/gateways/oobetet.rb +222 -170
  106. data/lib/flapjack/gateways/pager_duty.rb +388 -0
  107. data/lib/flapjack/gateways/pager_duty/alert.text.erb +13 -0
  108. data/lib/flapjack/gateways/slack.rb +56 -48
  109. data/lib/flapjack/gateways/slack/alert.text.erb +1 -1
  110. data/lib/flapjack/gateways/slack/rollup.text.erb +1 -1
  111. data/lib/flapjack/gateways/sms_aspsms.rb +155 -0
  112. data/lib/flapjack/gateways/sms_aspsms/alert.text.erb +7 -0
  113. data/lib/flapjack/gateways/sms_aspsms/rollup.text.erb +2 -0
  114. data/lib/flapjack/gateways/sms_messagenet.rb +77 -57
  115. data/lib/flapjack/gateways/sms_messagenet/alert.text.erb +3 -2
  116. data/lib/flapjack/gateways/sms_nexmo.rb +53 -51
  117. data/lib/flapjack/gateways/sms_nexmo/alert.text.erb +2 -2
  118. data/lib/flapjack/gateways/sms_nexmo/rollup.text.erb +1 -1
  119. data/lib/flapjack/gateways/sms_twilio.rb +79 -62
  120. data/lib/flapjack/gateways/sms_twilio/alert.text.erb +3 -2
  121. data/lib/flapjack/gateways/web.rb +437 -345
  122. data/lib/flapjack/gateways/web/middleware/request_timestamp.rb +18 -0
  123. data/lib/flapjack/gateways/web/public/css/bootstrap.css +3793 -4340
  124. data/lib/flapjack/gateways/web/public/css/bootstrap.css.map +1 -0
  125. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.eot +0 -0
  126. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.svg +273 -214
  127. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.ttf +0 -0
  128. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.woff +0 -0
  129. data/lib/flapjack/gateways/web/public/fonts/glyphicons-halflings-regular.woff2 +0 -0
  130. data/lib/flapjack/gateways/web/public/js/bootstrap.js +1637 -1607
  131. data/lib/flapjack/gateways/web/public/js/self_stats.js +1 -2
  132. data/lib/flapjack/gateways/web/views/_pagination.html.erb +19 -0
  133. data/lib/flapjack/gateways/web/views/check.html.erb +159 -121
  134. data/lib/flapjack/gateways/web/views/checks.html.erb +82 -41
  135. data/lib/flapjack/gateways/web/views/contact.html.erb +59 -71
  136. data/lib/flapjack/gateways/web/views/contacts.html.erb +32 -8
  137. data/lib/flapjack/gateways/web/views/index.html.erb +2 -2
  138. data/lib/flapjack/gateways/web/views/{layout.erb → layout.html.erb} +7 -23
  139. data/lib/flapjack/gateways/web/views/self_stats.html.erb +32 -33
  140. data/lib/flapjack/gateways/web/views/tag.html.erb +32 -0
  141. data/lib/flapjack/gateways/web/views/tags.html.erb +51 -0
  142. data/lib/flapjack/logger.rb +34 -3
  143. data/lib/flapjack/notifier.rb +180 -112
  144. data/lib/flapjack/patches.rb +8 -63
  145. data/lib/flapjack/pikelet.rb +185 -143
  146. data/lib/flapjack/processor.rb +323 -191
  147. data/lib/flapjack/record_queue.rb +33 -0
  148. data/lib/flapjack/redis_proxy.rb +66 -0
  149. data/lib/flapjack/utility.rb +21 -15
  150. data/lib/flapjack/version.rb +2 -1
  151. data/libexec/httpbroker.go +218 -14
  152. data/libexec/oneoff.go +13 -10
  153. data/spec/lib/flapjack/configuration_spec.rb +286 -0
  154. data/spec/lib/flapjack/coordinator_spec.rb +103 -157
  155. data/spec/lib/flapjack/data/check_spec.rb +175 -0
  156. data/spec/lib/flapjack/data/contact_spec.rb +26 -349
  157. data/spec/lib/flapjack/data/event_spec.rb +76 -291
  158. data/spec/lib/flapjack/data/medium_spec.rb +19 -0
  159. data/spec/lib/flapjack/data/rule_spec.rb +43 -0
  160. data/spec/lib/flapjack/data/scheduled_maintenance_spec.rb +976 -0
  161. data/spec/lib/flapjack/data/unscheduled_maintenance_spec.rb +34 -0
  162. data/spec/lib/flapjack/gateways/aws_sns_spec.rb +111 -60
  163. data/spec/lib/flapjack/gateways/email_spec.rb +194 -161
  164. data/spec/lib/flapjack/gateways/jabber_spec.rb +961 -162
  165. data/spec/lib/flapjack/gateways/jsonapi/methods/check_links_spec.rb +155 -0
  166. data/spec/lib/flapjack/gateways/jsonapi/methods/checks_spec.rb +426 -0
  167. data/spec/lib/flapjack/gateways/jsonapi/methods/contact_links_spec.rb +217 -0
  168. data/spec/lib/flapjack/gateways/jsonapi/methods/contacts_spec.rb +425 -0
  169. data/spec/lib/flapjack/gateways/jsonapi/methods/events_spec.rb +271 -0
  170. data/spec/lib/flapjack/gateways/jsonapi/methods/media_spec.rb +257 -0
  171. data/spec/lib/flapjack/gateways/jsonapi/methods/medium_links_spec.rb +163 -0
  172. data/spec/lib/flapjack/gateways/jsonapi/methods/metrics_spec.rb +8 -0
  173. data/spec/lib/flapjack/gateways/jsonapi/methods/rule_links_spec.rb +212 -0
  174. data/spec/lib/flapjack/gateways/jsonapi/methods/rules_spec.rb +289 -0
  175. data/spec/lib/flapjack/gateways/jsonapi/methods/scheduled_maintenance_links_spec.rb +49 -0
  176. data/spec/lib/flapjack/gateways/jsonapi/methods/scheduled_maintenances_spec.rb +242 -0
  177. data/spec/lib/flapjack/gateways/jsonapi/methods/tag_links_spec.rb +274 -0
  178. data/spec/lib/flapjack/gateways/jsonapi/methods/tags_spec.rb +302 -0
  179. data/spec/lib/flapjack/gateways/jsonapi/methods/unscheduled_maintenance_links_spec.rb +49 -0
  180. data/spec/lib/flapjack/gateways/jsonapi/methods/unscheduled_maintenances_spec.rb +339 -0
  181. data/spec/lib/flapjack/gateways/jsonapi_spec.rb +1 -1
  182. data/spec/lib/flapjack/gateways/oobetet_spec.rb +151 -79
  183. data/spec/lib/flapjack/gateways/pager_duty_spec.rb +353 -0
  184. data/spec/lib/flapjack/gateways/slack_spec.rb +53 -53
  185. data/spec/lib/flapjack/gateways/sms_aspsms_spec.rb +106 -0
  186. data/spec/lib/flapjack/gateways/sms_messagenet_spec.rb +111 -54
  187. data/spec/lib/flapjack/gateways/sms_nexmo_spec.rb +50 -51
  188. data/spec/lib/flapjack/gateways/sms_twilio_spec.rb +108 -48
  189. data/spec/lib/flapjack/gateways/web_spec.rb +144 -216
  190. data/spec/lib/flapjack/notifier_spec.rb +132 -1
  191. data/spec/lib/flapjack/pikelet_spec.rb +111 -50
  192. data/spec/lib/flapjack/processor_spec.rb +210 -40
  193. data/spec/lib/flapjack/redis_proxy_spec.rb +45 -0
  194. data/spec/lib/flapjack/utility_spec.rb +11 -15
  195. data/spec/service_consumers/fixture_data.rb +547 -0
  196. data/spec/service_consumers/pact_helper.rb +21 -32
  197. data/spec/service_consumers/pacts/flapjack-diner_v2.0.json +4652 -0
  198. data/spec/service_consumers/provider_states_for_flapjack-diner.rb +279 -322
  199. data/spec/service_consumers/provider_support.rb +8 -0
  200. data/spec/spec_helper.rb +34 -44
  201. data/spec/support/erb_view_helper.rb +1 -1
  202. data/spec/support/factories.rb +58 -0
  203. data/spec/support/jsonapi_helper.rb +15 -26
  204. data/spec/support/mock_logger.rb +43 -0
  205. data/spec/support/xmpp_comparable.rb +24 -0
  206. data/src/flapjack/transport_test.go +30 -1
  207. data/tasks/dump_keys.rake +82 -0
  208. data/tasks/events.rake +7 -7
  209. data/tasks/support/flapjack_config_benchmark.toml +28 -0
  210. data/tasks/support/flapjack_config_benchmark.yaml +0 -2
  211. metadata +175 -222
  212. data/Guardfile +0 -14
  213. data/etc/flapjack_config.yaml.example +0 -477
  214. data/features/cli_flapjack-populator.feature +0 -90
  215. data/features/support/silent_system.rb +0 -4
  216. data/lib/flapjack/cli/import.rb +0 -108
  217. data/lib/flapjack/data/entity.rb +0 -652
  218. data/lib/flapjack/data/entity_check.rb +0 -1044
  219. data/lib/flapjack/data/message.rb +0 -56
  220. data/lib/flapjack/data/migration.rb +0 -234
  221. data/lib/flapjack/data/notification_rule.rb +0 -425
  222. data/lib/flapjack/data/semaphore.rb +0 -44
  223. data/lib/flapjack/data/tagged.rb +0 -48
  224. data/lib/flapjack/gateways/jsonapi/check_methods.rb +0 -206
  225. data/lib/flapjack/gateways/jsonapi/check_presenter.rb +0 -221
  226. data/lib/flapjack/gateways/jsonapi/contact_methods.rb +0 -186
  227. data/lib/flapjack/gateways/jsonapi/entity_methods.rb +0 -223
  228. data/lib/flapjack/gateways/jsonapi/medium_methods.rb +0 -185
  229. data/lib/flapjack/gateways/jsonapi/metrics_methods.rb +0 -132
  230. data/lib/flapjack/gateways/jsonapi/notification_rule_methods.rb +0 -141
  231. data/lib/flapjack/gateways/jsonapi/pagerduty_credential_methods.rb +0 -139
  232. data/lib/flapjack/gateways/jsonapi/report_methods.rb +0 -146
  233. data/lib/flapjack/gateways/pagerduty.rb +0 -318
  234. data/lib/flapjack/gateways/pagerduty/alert.text.erb +0 -10
  235. data/lib/flapjack/gateways/web/public/css/select2-bootstrap.css +0 -87
  236. data/lib/flapjack/gateways/web/public/css/select2.css +0 -615
  237. data/lib/flapjack/gateways/web/public/css/tablesort.css +0 -67
  238. data/lib/flapjack/gateways/web/public/img/select2-spinner.gif +0 -0
  239. data/lib/flapjack/gateways/web/public/img/select2.png +0 -0
  240. data/lib/flapjack/gateways/web/public/img/select2x2.png +0 -0
  241. data/lib/flapjack/gateways/web/public/js/backbone.js +0 -1581
  242. data/lib/flapjack/gateways/web/public/js/backbone.jsonapi.js +0 -322
  243. data/lib/flapjack/gateways/web/public/js/flapjack.js +0 -82
  244. data/lib/flapjack/gateways/web/public/js/jquery.tablesorter.js +0 -1640
  245. data/lib/flapjack/gateways/web/public/js/jquery.tablesorter.widgets.js +0 -1390
  246. data/lib/flapjack/gateways/web/public/js/modules/contact.js +0 -520
  247. data/lib/flapjack/gateways/web/public/js/modules/entity.js +0 -28
  248. data/lib/flapjack/gateways/web/public/js/modules/medium.js +0 -40
  249. data/lib/flapjack/gateways/web/public/js/select2.js +0 -3397
  250. data/lib/flapjack/gateways/web/public/js/tablesort.js +0 -44
  251. data/lib/flapjack/gateways/web/public/js/underscore.js +0 -1276
  252. data/lib/flapjack/gateways/web/views/edit_contacts.html.erb +0 -173
  253. data/lib/flapjack/gateways/web/views/entities.html.erb +0 -30
  254. data/lib/flapjack/gateways/web/views/entity.html.erb +0 -51
  255. data/lib/flapjack/rack_logger.rb +0 -47
  256. data/lib/flapjack/redis_pool.rb +0 -42
  257. data/spec/lib/flapjack/data/entity_check_spec.rb +0 -1418
  258. data/spec/lib/flapjack/data/entity_spec.rb +0 -872
  259. data/spec/lib/flapjack/data/message_spec.rb +0 -30
  260. data/spec/lib/flapjack/data/migration_spec.rb +0 -104
  261. data/spec/lib/flapjack/data/notification_rule_spec.rb +0 -232
  262. data/spec/lib/flapjack/data/notification_spec.rb +0 -53
  263. data/spec/lib/flapjack/data/semaphore_spec.rb +0 -24
  264. data/spec/lib/flapjack/filters/acknowledgement_spec.rb +0 -6
  265. data/spec/lib/flapjack/filters/delays_spec.rb +0 -6
  266. data/spec/lib/flapjack/filters/ok_spec.rb +0 -6
  267. data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +0 -6
  268. data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +0 -6
  269. data/spec/lib/flapjack/gateways/jsonapi/check_methods_spec.rb +0 -315
  270. data/spec/lib/flapjack/gateways/jsonapi/check_presenter_spec.rb +0 -223
  271. data/spec/lib/flapjack/gateways/jsonapi/contact_methods_spec.rb +0 -131
  272. data/spec/lib/flapjack/gateways/jsonapi/entity_methods_spec.rb +0 -389
  273. data/spec/lib/flapjack/gateways/jsonapi/medium_methods_spec.rb +0 -231
  274. data/spec/lib/flapjack/gateways/jsonapi/notification_rule_methods_spec.rb +0 -169
  275. data/spec/lib/flapjack/gateways/jsonapi/pagerduty_credential_methods_spec.rb +0 -114
  276. data/spec/lib/flapjack/gateways/jsonapi/report_methods_spec.rb +0 -590
  277. data/spec/lib/flapjack/gateways/pagerduty_spec.rb +0 -249
  278. data/spec/lib/flapjack/gateways/web/views/check.html.erb_spec.rb +0 -21
  279. data/spec/lib/flapjack/gateways/web/views/contact.html.erb_spec.rb +0 -24
  280. data/spec/lib/flapjack/gateways/web/views/index.html.erb_spec.rb +0 -16
  281. data/spec/lib/flapjack/redis_pool_spec.rb +0 -29
  282. data/spec/service_consumers/pacts/flapjack-diner_v1.0.json +0 -4702
  283. data/tasks/entities.rake +0 -151
  284. data/tasks/profile.rake +0 -282
  285. data/tmp/acknowledge.rb +0 -13
  286. data/tmp/create_config_yaml.rb +0 -16
  287. data/tmp/create_event_ok.rb +0 -30
  288. data/tmp/create_event_unknown.rb +0 -30
  289. data/tmp/create_events_failure.rb +0 -34
  290. data/tmp/create_events_ok.rb +0 -32
  291. data/tmp/create_events_ok_fail_ack_ok.rb +0 -53
  292. data/tmp/create_events_ok_failure.rb +0 -41
  293. data/tmp/create_events_ok_failure_ack.rb +0 -53
  294. data/tmp/dummy_contacts.json +0 -43
  295. data/tmp/dummy_entities.json +0 -37
  296. data/tmp/generate_nagios_test_hosts.rb +0 -16
  297. data/tmp/notification_rules.rb +0 -73
  298. data/tmp/parse_config_yaml.rb +0 -7
  299. data/tmp/redis_find_spurious_unknown_states.rb +0 -52
  300. data/tmp/test_json_post.rb +0 -19
  301. data/tmp/test_notification_rules_api.rb +0 -171
@@ -1,1044 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'flapjack/patches'
4
-
5
- require 'flapjack/data/contact'
6
- require 'flapjack/data/event'
7
- require 'flapjack/data/entity'
8
- require 'flapjack/data/tagged'
9
-
10
- #FIXME: Require chronic_duration in the correct place
11
- require 'chronic_duration'
12
-
13
- # TODO might want to split the class methods out to a separate class, DAO pattern
14
- # ( http://en.wikipedia.org/wiki/Data_access_object ).
15
-
16
- module Flapjack
17
-
18
- module Data
19
-
20
- class EntityCheck
21
-
22
- STATE_OK = 'ok'
23
- STATE_WARNING = 'warning'
24
- STATE_CRITICAL = 'critical'
25
- STATE_UNKNOWN = 'unknown'
26
-
27
- NOTIFICATION_STATES = [:problem, :warning, :critical, :unknown,
28
- :recovery, :acknowledgement]
29
-
30
- include Tagged
31
-
32
- attr_accessor :entity, :check
33
-
34
- def self.add(check_data, options = {})
35
- raise "Redis connection not set" unless redis = options[:redis]
36
-
37
- entity_id = check_data['entity_id']
38
- raise "Entity id not provided" if entity_id.nil? || entity_id.empty?
39
-
40
- check_name = check_data['name']
41
- raise "Name not provided" if check_name.nil? || check_name.empty?
42
-
43
- ent = Flapjack::Data::Entity.find_by_id(entity_id, :redis => redis)
44
-
45
- raise "Entity not found for id '#{entity_id}'" if ent.nil?
46
-
47
- logger = options[:logger]
48
- timestamp = Time.now.to_i
49
-
50
- entity_name = ent.name
51
-
52
- redis.zadd("current_checks:#{entity_name}", timestamp, check_name)
53
- redis.zadd('current_entities', timestamp, entity_name)
54
-
55
- c = self.new(ent, check_name, :logger => logger, :timestamp => timestamp,
56
- :redis => redis)
57
- if check_data['tags'] && check_data['tags'].respond_to?(:each)
58
- c.add_tags(*check_data['tags'])
59
- end
60
- c
61
- end
62
-
63
- def self.for_event_id(event_id, options = {})
64
- raise "Redis connection not set" unless redis = options[:redis]
65
- entity_name, check_name = event_id.split(':', 2)
66
- create_entity = options[:create_entity]
67
- logger = options[:logger]
68
- entity = Flapjack::Data::Entity.find_by_name(entity_name,
69
- :create => create_entity, :logger => logger, :redis => redis)
70
- return if entity.nil?
71
- self.new(entity, check_name, :logger => logger, :redis => redis)
72
- end
73
-
74
- def self.for_entity_name(entity_name, check_name, options = {})
75
- raise "Redis connection not set" unless redis = options[:redis]
76
- create_entity = options[:create_entity]
77
- logger = options[:logger]
78
- entity = Flapjack::Data::Entity.find_by_name(entity_name,
79
- :create => create_entity, :logger => logger, :redis => redis)
80
- self.new(entity, check_name, :logger => logger, :redis => redis)
81
- end
82
-
83
- def self.for_entity_id(entity_id, check, options = {})
84
- raise "Redis connection not set" unless redis = options[:redis]
85
- create_entity = options[:create_entity]
86
- logger = options[:logger]
87
- entity = Flapjack::Data::Entity.find_by_id(entity_id,
88
- :create => create_entity, :logger => logger, :redis => redis)
89
- self.new(entity, check, :redis => redis)
90
- end
91
-
92
- def self.for_entity(entity, check, options = {})
93
- raise "Redis connection not set" unless redis = options[:redis]
94
- logger = options[:logger]
95
- self.new(entity, check, :logger => logger, :redis => redis)
96
- end
97
-
98
- def self.all(options = {})
99
- raise "Redis connection not set" unless redis = options[:redis]
100
- redis.zrange("all_checks", 0, -1).collect do |cname|
101
- self.for_event_id(cname, options)
102
- end
103
- end
104
-
105
- def self.find_current_names_for_entity_name(entity_name, options = {})
106
- raise "Redis connection not set" unless redis = options[:redis]
107
- redis.zrange("current_checks:#{entity_name}", 0, -1)
108
- end
109
-
110
- def self.find_current_names(options = {})
111
- raise "Redis connection not set" unless redis = options[:redis]
112
- self.conflate_to_keys(self.find_current_names_by_entity(:redis => redis))
113
- end
114
-
115
- def self.find_current_names_by_entity(options = {})
116
- raise "Redis connection not set" unless redis = options[:redis]
117
- d = {}
118
- redis.zrange("current_entities", 0, -1).each {|entity|
119
- d[entity] = redis.zrange("current_checks:#{entity}", 0, -1)
120
- }
121
- d
122
- end
123
-
124
- def self.count_current(options = {})
125
- raise "Redis connection not set" unless redis = options[:redis]
126
- redis.zrange("current_entities", 0, -1).inject(0) {|memo, entity|
127
- memo + redis.zcount("current_checks:#{entity}", '-inf', '+inf')
128
- }
129
- end
130
-
131
- def self.find_current_names_failing(options = {})
132
- raise "Redis connection not set" unless redis = options[:redis]
133
- self.conflate_to_keys(self.find_current_names_failing_by_entity(:redis => redis))
134
- end
135
-
136
- def self.find_current_names_failing_by_entity(options = {})
137
- raise "Redis connection not set" unless redis = options[:redis]
138
- redis.zrange("failed_checks", 0, -1).inject({}) do |memo, key|
139
- entity, check = key.split(':', 2)
140
- if !!redis.zscore("current_checks:#{entity}", check)
141
- memo[entity] ||= []
142
- memo[entity] << check
143
- end
144
- memo
145
- end
146
- end
147
-
148
- def self.count_current_failing(options = {})
149
- raise "Redis connection not set" unless redis = options[:redis]
150
- redis.zrange("failed_checks", 0, -1).count do |key|
151
- entity, check = key.split(':', 2)
152
- !!redis.zscore("current_checks:#{entity}", check)
153
- end
154
- end
155
-
156
- def self.unacknowledged_failing(options = {})
157
- raise "Redis connection not set" unless redis = options[:redis]
158
-
159
- redis.zrange('failed_checks', '0', '-1').reject {|entity_check|
160
- redis.exists(entity_check + ':unscheduled_maintenance')
161
- }.collect {|entity_check|
162
- Flapjack::Data::EntityCheck.for_event_id(entity_check, :redis => redis)
163
- }.compact
164
- end
165
-
166
- def self.find_maintenance(options = {})
167
- raise "Redis connection not set" unless redis = options[:redis]
168
- type = options[:type]
169
-
170
- checks_with_maints = redis.zrange("all_checks", 0, -1).select do |ec_name|
171
- # not ideal, but redis internals should essentially make this a lot
172
- # of separate hash lookups
173
- redis.exists("#{ec_name}:#{type}_maintenances")
174
- end
175
-
176
- return [] if checks_with_maints.empty?
177
-
178
- entity_re = options[:entity].nil? ? nil : Regexp.new(options[:entity])
179
- check_re = options[:check].nil? ? nil : Regexp.new(options[:check])
180
- reason_re = options[:reason].nil? ? nil : Regexp.new(options[:reason])
181
-
182
- checks_with_maints.inject([]) do |memo, k|
183
- entity, check = k.split(':', 2)
184
- ec = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => redis)
185
-
186
- # Only return entries which match what was passed in
187
- next memo if (options[:state] && (options[:state] != ec.state)) ||
188
- !(entity_re.nil? || entity_re.match(entity)) ||
189
- !(check_re.nil? || check_re.match(check))
190
-
191
- ec.maintenances(nil, nil, type.to_sym => true).each do |window|
192
- next unless (reason_re.nil? || reason_re.match(window[:summary])) &&
193
- check_maintenance_timestamp(options[:started], window[:start_time]) &&
194
- check_maintenance_timestamp(options[:finishing], window[:end_time]) &&
195
- check_maintenance_interval(options[:duration], window[:duration])
196
-
197
- memo << { :entity => entity,
198
- :check => check,
199
- :state => ec.state
200
- }.merge(window)
201
- end
202
-
203
- memo
204
- end
205
- end
206
-
207
- def self.delete_maintenance(options = {})
208
- raise "Redis connection not set" unless redis = options[:redis]
209
- entries = find_maintenance(options)
210
- # Try to delete all entries passed in, but return false if any entries failed
211
- errors = {}
212
- entries.each do |entry|
213
- identifier = "#{entry[:entity]}:#{entry[:check]}:#{entry[:start_time]}"
214
- if entry[:end_time] < Time.now.to_i
215
- errors[identifier] = "Maintenance can't be deleted as it finished in the past"
216
- else
217
- entity = entry[:entity]
218
- check = entry[:check]
219
-
220
- ec = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => redis)
221
- success = case options[:type]
222
- when 'scheduled'
223
- ec.end_scheduled_maintenance(entry[:start_time])
224
- when 'unscheduled'
225
- ec.end_unscheduled_maintenance(entry[:end_time])
226
- end
227
- errors[identifier] = "The following entry failed to delete: #{entry}" unless success
228
- end
229
- end
230
- errors
231
- end
232
-
233
- def self.create_maintenance(options = {})
234
- raise "Redis connection not set" unless redis = options[:redis]
235
- errors = {}
236
- entities = options[:entity].is_a?(String) ? options[:entity].split(',') : options[:entity]
237
- checks = options[:check].is_a?(String) ? options[:check].split(',') : options[:check]
238
- entities.each do |entity|
239
- # Create the entity if it doesn't exist, so we can schedule maintenance against it
240
- Flapjack::Data::Entity.find_by_name(entity, :redis => redis, :create => true)
241
- checks.each do |check|
242
- ec = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => redis)
243
- started = Chronic.parse(options[:started]).to_i
244
- duration = ChronicDuration.parse(options[:duration]).to_i
245
- raise "Failed to parse start time #{options[:started]}" if started == 0
246
- raise"Failed to parse duration #{options[:duration]}" if duration == 0
247
-
248
- success = case options[:type]
249
- when 'scheduled'
250
- ec.create_scheduled_maintenance(started, duration, :summary => options[:reason])
251
- when 'unscheduled'
252
- ec.create_unscheduled_maintenance(started, duration, :summary => options[:reason])
253
- end
254
- identifier = "#{entity}:#{check}:#{started}"
255
- errors[identifier] = "The following check failed to create: #{identifier}" unless success
256
- end
257
- end
258
- errors
259
- end
260
-
261
-
262
- def self.check_maintenance_interval(input, maintenance_duration)
263
- # If no duration was specified, give back all results
264
- return true unless input
265
- inp = input.downcase
266
-
267
- if inp.start_with?('between')
268
- # Between 3 hours and 4 hours translates to more than 3 hours, less than 4 hours
269
- first, last = inp.match(/between (.*) and (.*)/).captures
270
- suffix = last.match(/\w (.*)/) ? last.match(/\w (.*)/).captures.first : ''
271
-
272
- # If the first duration only contains only a single word, the unit is
273
- # most likely directly after the first word of the the second duration
274
- # eg between 3 and 4 hours
275
- first = "#{first} #{suffix}" unless / /.match(first)
276
- raise "Failed to parse #{first}" unless ChronicDuration.parse(first)
277
- raise "Failed to parse #{last}" unless ChronicDuration.parse(last)
278
-
279
- (first, last = last, first) if ChronicDuration.parse(first) > ChronicDuration.parse(last)
280
- return check_maintenance_interval("more than #{first}", maintenance_duration) && check_maintenance_interval("less than #{last}", maintenance_duration)
281
- end
282
-
283
- # ChronicDuration can't parse timestamps for strings starting with before or after.
284
- # Strip the before or after for the conversion only, but use it for the comparison later
285
- ctime = inp.gsub(/^(more than|less than|before|after)/, '')
286
- input_duration = ChronicDuration.parse(ctime, :keep_zero => true)
287
-
288
- raise "Failed to parse time: #{input}" if input_duration.nil?
289
-
290
- case inp
291
- when /^(less than|before)/
292
- maintenance_duration < input_duration
293
- when /^(more than|after)/
294
- maintenance_duration > input_duration
295
- else
296
- maintenance_duration == input_duration
297
- end
298
- end
299
-
300
- def self.check_maintenance_timestamp(input, maintenance_timestamp)
301
- # If no time was specified, give back all results
302
- return true unless input
303
- inp = input.downcase
304
-
305
- # Chronic can't parse timestamps for strings starting with before, after or in some cases, on.
306
- # Strip the before or after for the conversion only, but use it for the comparison later
307
- ctime = inp.gsub(/^(on|before|after)/, '')
308
-
309
- base_time = Time.now
310
-
311
- case inp
312
- # Between 3 and 4 hours ago translates to more than 3 hours ago, less than 4 hours ago
313
- when /^between/
314
- first, last = inp.match(/between (.*) and (.*)/).captures
315
-
316
- # If the first time only contains only a single word, the unit (and past/future) is
317
- # most likely directly after the first word of the the second time
318
- # eg between 3 and 4 hours ago
319
- suffix = last.match(/\w (.*)/) ? last.match(/\w (.*)/).captures.first : ''
320
- first = "#{first} #{suffix}" unless / /.match(first)
321
-
322
- first += ' from now' unless Chronic.parse(first, :now => base_time)
323
- last += ' from now' unless Chronic.parse(last, :now => base_time)
324
- raise "Failed to parse #{first}" unless ChronicDuration.parse(first)
325
- raise "Failed to parse #{last}" unless ChronicDuration.parse(last)
326
-
327
- (first, last = last, first) if Chronic.parse(first, :now => base_time) > Chronic.parse(last, :now => base_time)
328
- (check_maintenance_timestamp("after #{first}", maintenance_timestamp) &&
329
- check_maintenance_timestamp("before #{last}", maintenance_timestamp))
330
- # On 1/1/15. We use Chronic to work out the minimum and maximum timestamp, and use the same behaviour as between.
331
- when /^on/
332
- first = Chronic.parse(ctime, :guess => false, :now => base_time).first
333
- last = Chronic.parse(ctime, :guess => false, :now => base_time).last
334
- (check_maintenance_timestamp("after #{first}", maintenance_timestamp) &&
335
- check_maintenance_timestamp("before #{last}", maintenance_timestamp))
336
- else
337
- # We assume timestamps are rooted against the current time.
338
- # Chronic doesn't always handle this correctly, so we need to handhold it a little
339
- input_timestamp = Chronic.parse(ctime, :keep_zero => true, :now => base_time).to_i
340
- input_timestamp = Chronic.parse(ctime + ' from now', :keep_zero => true, :now => base_time).to_i if input_timestamp == 0
341
-
342
- raise "Failed to parse time: #{input}" if input_timestamp == 0
343
-
344
- case inp
345
- when /^less than/
346
- if input_timestamp < base_time.to_i
347
- maintenance_timestamp > input_timestamp
348
- else
349
- maintenance_timestamp < input_timestamp
350
- end
351
- when /^more than/
352
- # FIXME: and here is the race condition. input timestamp could be in the previous second
353
- # to Time.now due to code execution time:
354
- if input_timestamp < base_time.to_i
355
- maintenance_timestamp < input_timestamp
356
- else
357
- maintenance_timestamp > input_timestamp
358
- end
359
- when /^before/
360
- maintenance_timestamp < input_timestamp
361
- when /^after/
362
- maintenance_timestamp > input_timestamp
363
- end
364
- end
365
- end
366
-
367
- def self.in_unscheduled_maintenance_for_event_id?(event_id, options)
368
- raise "Redis connection not set" unless redis = options[:redis]
369
- redis.exists("#{event_id}:unscheduled_maintenance")
370
- end
371
-
372
- def self.in_scheduled_maintenance_for_event_id?(event_id, options)
373
- raise "Redis connection not set" unless redis = options[:redis]
374
- redis.exists("#{event_id}:scheduled_maintenance")
375
- end
376
-
377
- def self.state_for_event_id?(event_id, options)
378
- raise "Redis connection not set" unless redis = options[:redis]
379
- redis.hget("check:#{event_id}", 'state')
380
- end
381
-
382
- # takes an array of ages (in seconds) to split all checks up by
383
- # - age means how long since the last update
384
- # - 0 age is implied if not explicitly passed
385
- # returns arrays of all current checks hashed by age range upper bound, eg:
386
- #
387
- # EntityCheck.find_all_split_by_freshness([60, 300], opts) =>
388
- # { 0 => [ 'foo-app-01:SSH' ],
389
- # 60 => [ 'foo-app-01:Ping', 'foo-app-01:Disk / Utilisation' ],
390
- # 300 => [] }
391
- #
392
- # you can also set :counts to true in options and you'll just get the counts, eg:
393
- #
394
- # EntityCheck.find_all_split_by_freshness([60, 300], opts.merge(:counts => true)) =>
395
- # { 0 => 1,
396
- # 60 => 3,
397
- # 300 => 0 }
398
- #
399
- # and you can get the last update time with each check too by passing :with_times => true eg:
400
- #
401
- # EntityCheck.find_all_split_by_freshness([60, 300], opts.merge(:with_times => true)) =>
402
- # { 0 => [ ['foo-app-01:SSH', 1382329923.0] ],
403
- # 60 => [ ['foo-app-01:Ping', 1382329922.0], ['foo-app-01:Disk / Utilisation', 1382329921.0] ],
404
- # 300 => [] }
405
- #
406
- def self.find_all_split_by_freshness(ages, options)
407
- raise "Redis connection not set" unless redis = options[:redis]
408
- logger = options[:logger]
409
-
410
- raise "ages does not respond_to? :each and :each_with_index" unless ages.respond_to?(:each) && ages.respond_to?(:each_with_index)
411
- raise "age values must respond_to? :to_i" unless ages.all? {|age| age.respond_to?(:to_i) }
412
-
413
- ages << 0
414
- ages = ages.sort.uniq
415
-
416
- start_time = Time.now
417
-
418
- checks = []
419
- # get all the current checks, with last update time
420
- Flapjack::Data::Entity.all(:enabled => true, :redis => redis).each do |entity|
421
- redis.zrange("current_checks:#{entity.name}", 0, -1, :withscores => true).each do |check, score|
422
- checks << ["#{entity.name}:#{check}", score]
423
- end
424
- end
425
- logger.debug("found #{checks.length} current checks on enabled entities") if logger
426
-
427
- skeleton = ages.inject({}) {|memo, age| memo[age] = [] ; memo }
428
- age_ranges = ages.reverse.each_cons(2)
429
- results_with_times = checks.inject(skeleton) do |memo, check|
430
- check_age = start_time.to_i - check[1]
431
- check_age = 0 unless check_age > 0
432
- if check_age >= ages.last
433
- memo[ages.last] << check
434
- else
435
- age_range = age_ranges.detect {|a, b| check_age < a && check_age >= b }
436
- memo[age_range.last] << check unless age_range.nil?
437
- end
438
- memo
439
- end
440
-
441
- case
442
- when options[:with_times]
443
- results_with_times
444
- when options[:counts]
445
- results_with_times.inject({}) do |memo, (age, checks)|
446
- memo[age] = checks.length
447
- memo
448
- end
449
- else
450
- results_with_times.inject({}) do |memo, (age, checks)|
451
- memo[age] = checks.map { |check| check[0] }
452
- memo
453
- end
454
- end
455
- end
456
-
457
- def entity_name
458
- entity.name
459
- end
460
-
461
- # takes a key "entity:check", returns true if the check is in unscheduled
462
- # maintenance
463
- def in_unscheduled_maintenance?
464
- @redis.exists("#{@key}:unscheduled_maintenance")
465
- end
466
-
467
- # returns true if the check is in scheduled maintenance
468
- def in_scheduled_maintenance?
469
- @redis.exists("#{@key}:scheduled_maintenance")
470
- end
471
-
472
- # return data about current maintenance (scheduled or unscheduled, as specified)
473
- def current_maintenance(opts = {})
474
- sched = opts[:scheduled] ? 'scheduled' : 'unscheduled'
475
- ts = @redis.get("#{@key}:#{sched}_maintenance")
476
- return unless ts
477
- {:start_time => ts.to_i,
478
- :duration => @redis.zscore("#{@key}:#{sched}_maintenances", ts),
479
- :summary => @redis.get("#{@key}:#{ts}:#{sched}_maintenance:summary"),
480
- }
481
- end
482
-
483
- def create_unscheduled_maintenance(start_time, duration, opts = {})
484
- raise ArgumentError, 'start time must be provided as a Unix timestamp' unless start_time && start_time.is_a?(Integer)
485
- raise ArgumentError, 'duration in seconds must be provided' unless duration && duration.is_a?(Integer) && (duration > 0)
486
-
487
- summary = opts[:summary]
488
- time_remaining = (start_time + duration) - Time.now.to_i
489
- if time_remaining > 0
490
- end_unscheduled_maintenance(start_time) if in_unscheduled_maintenance?
491
- @redis.setex("#{@key}:unscheduled_maintenance", time_remaining, start_time)
492
- end
493
- @redis.zadd("#{@key}:unscheduled_maintenances", duration, start_time)
494
- @redis.set("#{@key}:#{start_time}:unscheduled_maintenance:summary", summary)
495
-
496
- @redis.zadd("#{@key}:sorted_unscheduled_maintenance_timestamps", start_time, start_time)
497
- end
498
-
499
- # ends any unscheduled maintenance
500
- def end_unscheduled_maintenance(end_time)
501
- raise ArgumentError, 'end time must be provided as a Unix timestamp' unless end_time && end_time.is_a?(Integer)
502
-
503
- if (um_start = @redis.get("#{@key}:unscheduled_maintenance"))
504
- duration = end_time - um_start.to_i
505
- @logger.debug("ending unscheduled downtime for #{@key} at #{Time.at(end_time).to_s}") if @logger
506
- @redis.zadd("#{@key}:unscheduled_maintenances", duration, um_start) # updates existing UM 'score'
507
- @redis.del("#{@key}:unscheduled_maintenance") == 1
508
- else
509
- @logger.debug("end_unscheduled_maintenance called for #{@key} but none found") if @logger
510
- true
511
- end
512
- end
513
-
514
- # creates a scheduled maintenance period for a check
515
- # TODO: consider adding some validation to the data we're adding in here
516
- # eg start_time is a believable unix timestamp (not in the past and not too
517
- # far in the future), duration is within some bounds...
518
- def create_scheduled_maintenance(start_time, duration, opts = {})
519
- raise ArgumentError, 'start time must be provided as a Unix timestamp' unless start_time && start_time.is_a?(Integer)
520
- raise ArgumentError, 'duration in seconds must be provided' unless duration && duration.is_a?(Integer) && (duration > 0)
521
-
522
- summary = opts[:summary]
523
- @redis.zadd("#{@key}:scheduled_maintenances", duration, start_time)
524
- @redis.set("#{@key}:#{start_time}:scheduled_maintenance:summary", summary)
525
-
526
- @redis.zadd("#{@key}:sorted_scheduled_maintenance_timestamps", start_time, start_time)
527
-
528
- # scheduled maintenance periods have changed, revalidate
529
- update_current_scheduled_maintenance(:revalidate => true)
530
- end
531
-
532
- # if not in scheduled maintenance, looks in scheduled maintenance list for a check to see if
533
- # current state should be set to scheduled maintenance, and sets it as appropriate
534
- def update_current_scheduled_maintenance(opts = {})
535
- if opts[:revalidate]
536
- @redis.del("#{@key}:scheduled_maintenance")
537
- else
538
- return if in_scheduled_maintenance?
539
- end
540
-
541
- # are we within a scheduled maintenance period?
542
- current_time = Time.now.to_i
543
- current_sched_ms = maintenances(nil, nil, :scheduled => true).select {|sm|
544
- (sm[:start_time] <= current_time) && (current_time < sm[:end_time])
545
- }
546
- return if current_sched_ms.empty?
547
-
548
- # yes! so set current scheduled maintenance
549
- # if multiple scheduled maintenances found, find the end_time furthest in the future
550
- most_futuristic = current_sched_ms.max {|sm| sm[:end_time] }
551
- start_time = most_futuristic[:start_time]
552
-
553
- duration = most_futuristic[:end_time] - current_time
554
- if duration > 0
555
- @redis.setex("#{@key}:scheduled_maintenance", duration.to_i, start_time)
556
- end
557
- end
558
-
559
- # TODO allow summary to be changed as part of the termination
560
- def end_scheduled_maintenance(start_time)
561
- raise ArgumentError, 'start time must be supplied as a Unix timestamp' unless start_time && start_time.is_a?(Integer)
562
-
563
- # don't do anything if a scheduled maintenance period with that start time isn't stored
564
- duration = @redis.zscore("#{@key}:scheduled_maintenances", start_time)
565
- return false if duration.nil?
566
-
567
- current_time = Time.now.to_i
568
-
569
- if start_time > current_time
570
- # the scheduled maintenance period (if it exists) is in the future
571
- @redis.del("#{@key}:#{start_time}:scheduled_maintenance:summary")
572
- @redis.zrem("#{@key}:scheduled_maintenances", start_time)
573
-
574
- @redis.zremrangebyscore("#{@key}:sorted_scheduled_maintenance_timestamps", start_time, start_time)
575
-
576
- # scheduled maintenance periods (may) have changed, revalidate
577
- update_current_scheduled_maintenance(:revalidate => true)
578
-
579
- return true
580
- elsif (start_time + duration) > current_time
581
- # it spans the current time, so we'll stop it at that point
582
- new_duration = current_time - start_time
583
- @redis.zadd("#{@key}:scheduled_maintenances", new_duration, start_time)
584
-
585
- # scheduled maintenance periods have changed, revalidate
586
- update_current_scheduled_maintenance(:revalidate => true)
587
-
588
- return true
589
- end
590
-
591
- false
592
- end
593
-
594
- # returns nil if no previous state; this must be considered as a possible
595
- # state by classes using this model
596
- def state
597
- @redis.hget("check:#{@key}", 'state')
598
- end
599
-
600
- def update_state(new_state, options = {})
601
- return unless [STATE_OK, STATE_WARNING,
602
- STATE_CRITICAL, STATE_UNKNOWN].include?(new_state)
603
-
604
- timestamp = options[:timestamp] || Time.now.to_i
605
- summary = options[:summary]
606
- details = options[:details]
607
- perfdata = options[:perfdata]
608
- count = options[:count]
609
- initial_delay = options[:initial_failure_delay]
610
- repeat_delay = options[:repeat_failure_delay]
611
-
612
- old_state = self.state
613
-
614
- @redis.multi do |multi|
615
-
616
- if old_state != new_state
617
-
618
- # Note the current state (for speedy lookups)
619
- multi.hset("check:#{@key}", 'state', new_state)
620
-
621
- # FIXME: rename to last_state_change?
622
- multi.hset("check:#{@key}", 'last_change', timestamp)
623
-
624
- case new_state
625
- when STATE_WARNING, STATE_CRITICAL, STATE_UNKNOWN
626
- multi.zadd('failed_checks', timestamp, @key)
627
- # FIXME: Iterate through a list of tags associated with an entity:check pair, and update counters
628
- else
629
- multi.zrem("failed_checks", @key)
630
- # FIXME: Iterate through a list of tags associated with an entity:check pair, and update counters
631
- end
632
-
633
- # Retain event data for entity:check pair
634
- # NB (appending to tail as far as Redis is concerned)
635
- multi.rpush("#{@key}:states", timestamp)
636
- multi.set("#{@key}:#{timestamp}:state", new_state)
637
- multi.set("#{@key}:#{timestamp}:summary", summary) if summary
638
- multi.set("#{@key}:#{timestamp}:details", details) if details
639
- multi.set("#{@key}:#{timestamp}:count", count) if count
640
-
641
- multi.zadd("#{@key}:sorted_state_timestamps", timestamp, timestamp)
642
- end
643
-
644
- # Track when we last saw an event for a particular entity:check pair
645
- # (used to be last_update=, but needs to happen in the multi block)
646
- multi.hset("check:#{@key}", 'last_update', timestamp)
647
- multi.zadd("all_checks", timestamp, @key)
648
- multi.zadd("all_checks:#{entity.name}", timestamp, check)
649
- multi.zadd("current_checks:#{entity.name}", timestamp, check)
650
- multi.zadd('current_entities', timestamp, entity.name)
651
-
652
- # Even if this isn't a state change, we need to update the current state
653
- # hash summary and details (as they may have changed)
654
- multi.hset("check:#{@key}", 'summary', (summary || ''))
655
- multi.hset("check:#{@key}", 'details', (details || ''))
656
-
657
- # NB: delays will revert to defaults if event sources don't continue sending
658
- # through their custom delays in the event structure
659
- multi.hset("check:#{@key}", 'initial_failure_delay', (initial_delay || Flapjack::DEFAULT_INITIAL_FAILURE_DELAY))
660
- multi.hset("check:#{@key}", 'repeat_failure_delay', (repeat_delay || Flapjack::DEFAULT_REPEAT_FAILURE_DELAY))
661
- if perfdata
662
- multi.hset("check:#{@key}", 'perfdata', format_perfdata(perfdata).to_json)
663
- # multi.set("#{@key}:#{timestamp}:perfdata", perfdata)
664
- end
665
-
666
- end
667
- end
668
-
669
- def last_update
670
- lu = @redis.hget("check:#{@key}", 'last_update')
671
- return unless lu && !!(lu =~ /^\d+$/)
672
- lu.to_i
673
- end
674
-
675
- # disables a check (removes currency)
676
- def disable!
677
- timestamp = Time.now.to_i
678
- @logger.debug("disabling check [#{@key}]") if @logger
679
- entity_name = entity.name
680
- @redis.zadd("all_checks", timestamp, @key)
681
- @redis.zadd("all_checks:#{entity_name}", timestamp, check)
682
- @redis.zrem("current_checks:#{entity_name}", check)
683
- if @redis.zcount("current_checks:#{entity_name}", '-inf', '+inf') == 0
684
- @redis.zrem("current_entities", entity.name)
685
- end
686
- end
687
-
688
- def enable!
689
- timestamp = Time.now.to_i
690
- entity_name = entity.name
691
- @redis.zadd("all_checks", timestamp, @key)
692
- @redis.zadd("all_checks:#{entity_name}", timestamp, check)
693
- @redis.zadd("current_checks:#{entity_name}", timestamp, check)
694
- @redis.zadd('current_entities', timestamp, entity_name)
695
- end
696
-
697
- def enabled?
698
- !!@redis.zscore("current_checks:#{entity.name}", check)
699
- end
700
-
701
- def last_change
702
- lc = @redis.hget("check:#{@key}", 'last_change')
703
- return unless lc && !!(lc =~ /^\d+$/)
704
- lc.to_i
705
- end
706
-
707
- def last_notification_for_state(state)
708
- return unless NOTIFICATION_STATES.include?(state)
709
- ln = @redis.get("#{@key}:last_#{state.to_s}_notification")
710
- return {:timestamp => nil, :summary => nil} unless (ln && ln =~ /^\d+$/)
711
- { :timestamp => ln.to_i,
712
- :summary => @redis.get("#{@key}:#{ln.to_i}:summary") }
713
- end
714
-
715
- def last_notifications_of_each_type
716
- NOTIFICATION_STATES.inject({}) do |memo, state|
717
- memo[state] = last_notification_for_state(state) unless (state == :problem)
718
- memo
719
- end
720
- end
721
-
722
- def max_notified_severity_of_current_failure
723
- last_recovery = last_notification_for_state(:recovery)[:timestamp] || 0
724
-
725
- last_critical = last_notification_for_state(:critical)[:timestamp]
726
- return STATE_CRITICAL if last_critical && (last_critical > last_recovery)
727
-
728
- last_warning = last_notification_for_state(:warning)[:timestamp]
729
- return STATE_WARNING if last_warning && (last_warning > last_recovery)
730
-
731
- last_unknown = last_notification_for_state(:unknown)[:timestamp]
732
- return STATE_UNKNOWN if last_unknown && (last_unknown > last_recovery)
733
-
734
- nil
735
- end
736
-
737
- # unpredictable results if there are multiple notifications of different
738
- # types sent at the same time
739
- def last_notification
740
- nils = { :type => nil, :timestamp => nil, :summary => nil }
741
-
742
- lne = last_notifications_of_each_type
743
- ln = lne.delete_if {|type, notif| notif[:timestamp].nil? || notif[:timestamp].to_i <= 0 }
744
- if ln.find {|type, notif| type == :warning or type == :critical}
745
- ln = ln.delete_if {|type, notif| type == :problem }
746
- end
747
- return nils if ln.empty?
748
- lns = ln.sort_by { |type, notif| notif[:timestamp] }.last
749
- { :type => lns[0], :timestamp => lns[1][:timestamp], :summary => lns[1][:summary] }
750
- end
751
-
752
- def event_count_at(timestamp)
753
- eca = @redis.get("#{@key}:#{timestamp}:count")
754
- return unless (eca && eca =~ /^\d+$/)
755
- eca.to_i
756
- end
757
-
758
- def failed?
759
- [STATE_WARNING, STATE_CRITICAL, STATE_UNKNOWN].include?( state )
760
- end
761
-
762
- def ok?
763
- [STATE_OK].include?( state )
764
- end
765
-
766
- def summary
767
- @redis.hget("check:#{@key}", 'summary')
768
- end
769
-
770
- def details
771
- @redis.hget("check:#{@key}", 'details')
772
- end
773
-
774
- def perfdata
775
- data = @redis.hget("check:#{@key}", 'perfdata')
776
- begin
777
- data = JSON.parse(data) if data
778
- rescue
779
- data = "Unable to parse string: #{data}"
780
- end
781
-
782
- data = [data] if data.is_a?(Hash)
783
- data
784
- end
785
-
786
- def initial_failure_delay
787
- delay = @redis.hget("check:#{@key}", 'initial_failure_delay')
788
- delay.to_i unless delay.nil?
789
- end
790
-
791
- def repeat_failure_delay
792
- delay = @redis.hget("check:#{@key}", 'repeat_failure_delay')
793
- delay.to_i unless delay.nil?
794
- end
795
-
796
- # Returns a list of states for this entity check, sorted by timestamp.
797
- #
798
- # start_time and end_time should be passed as integer timestamps; these timestamps
799
- # will be considered inclusively, so, e.g. coverage for a day should go
800
- # from midnight to 11:59:59 PM. Pass nil for either end to leave that
801
- # side unbounded.
802
- def historical_states(start_time, end_time, opts = {})
803
- start_time = '-inf' if start_time.to_i <= 0
804
- end_time = '+inf' if end_time.to_i <= 0
805
-
806
- args = ["#{@key}:sorted_state_timestamps"]
807
-
808
- order = opts[:order]
809
- if (order && 'desc'.eql?(order.downcase))
810
- query = :zrevrangebyscore
811
- args += [end_time.to_s, start_time.to_s]
812
- else
813
- query = :zrangebyscore
814
- args += [start_time.to_s, end_time.to_s]
815
- end
816
-
817
- if opts[:limit] && (opts[:limit].to_i > 0)
818
- args << {:limit => [0, opts[:limit]]}
819
- end
820
-
821
- state_ts = @redis.send(query, *args)
822
-
823
- state_data = nil
824
-
825
- @redis.multi do |r|
826
- state_data = state_ts.collect {|ts|
827
- {:timestamp => ts.to_i,
828
- :state => r.get("#{@key}:#{ts}:state"),
829
- :summary => r.get("#{@key}:#{ts}:summary"),
830
- :details => r.get("#{@key}:#{ts}:details"),
831
- # :count => r.get("#{@key}:#{ts}:count"),
832
- # :check_latency => r.get("#{@key}:#{ts}:check_latency")
833
- }
834
- }
835
- end
836
-
837
- # The redis commands in a pipeline block return future objects, which
838
- # must be evaluated. This relies on a patch in flapjack/patches.rb to
839
- # make the Future objects report their class.
840
- state_data.collect {|sd|
841
- sd.merge!(sd) {|k,ov,nv|
842
- (nv.class == Redis::Future) ? nv.value : nv
843
- }
844
- }
845
- end
846
-
847
- # requires a known state timestamp, i.e. probably one returned via
848
- # historical_states. will find the one before that in the sorted set,
849
- # if any.
850
- def historical_state_before(timestamp)
851
- pos = @redis.zrank("#{@key}:sorted_state_timestamps", timestamp)
852
- return if pos.nil? || pos < 1
853
- ts = @redis.zrange("#{@key}:sorted_state_timestamps", pos - 1, pos)
854
- return if ts.nil? || ts.empty?
855
- {:timestamp => ts.first.to_i,
856
- :state => @redis.get("#{@key}:#{ts.first}:state"),
857
- :summary => @redis.get("#{@key}:#{ts.first}:summary"),
858
- :details => @redis.get("#{@key}:#{ts.first}:details")}
859
- end
860
-
861
- # Returns a list of maintenance periods (either unscheduled or scheduled) for this
862
- # entity check, sorted by timestamp.
863
- #
864
- # start_time and end_time should be passed as integer timestamps; these timestamps
865
- # will be considered inclusively, so, e.g. coverage for a day should go
866
- # from midnight to 11:59:59 PM. Pass nil for either end to leave that
867
- # side unbounded.
868
- def maintenances(start_time, end_time, opts = {})
869
- sched = opts[:scheduled] ? 'scheduled' : 'unscheduled'
870
-
871
- start_time ||= '-inf'
872
- end_time ||= '+inf'
873
- order = opts[:order]
874
- query = (order && 'desc'.eql?(order.downcase)) ? :zrevrangebyscore : :zrangebyscore
875
- maint_ts = @redis.send(query, "#{@key}:sorted_#{sched}_maintenance_timestamps", start_time, end_time)
876
-
877
- maint_data = nil
878
-
879
- @redis.multi do |r|
880
- maint_data = maint_ts.collect {|ts|
881
- {:start_time => ts.to_i,
882
- :duration => r.zscore("#{@key}:#{sched}_maintenances", ts),
883
- :summary => r.get("#{@key}:#{ts}:#{sched}_maintenance:summary"),
884
- }
885
- }
886
- end
887
-
888
- # The redis commands in a pipeline block return future objects, which
889
- # must be evaluated. This relies on a patch in flapjack/patches.rb to
890
- # make the Future objects report their class.
891
- maint_data.collect {|md|
892
- md.merge!(md) {|k,ov,nv| (nv.class == Redis::Future) ? nv.value : nv }
893
- md[:end_time] = (md[:start_time] + md[:duration]).floor
894
- md
895
- }
896
- end
897
-
898
- # takes a check, looks up contacts that are interested in this check (or in the check's entity)
899
- # and returns an array of contact records
900
- def contacts
901
- contact_ids = @redis.smembers("contacts_for:#{entity.id}:#{check}")
902
-
903
- if @logger
904
- @logger.debug("#{contact_ids.length} contact(s) for #{entity.id}:#{check}: " +
905
- contact_ids.inspect)
906
- end
907
-
908
- entity.contacts + contact_ids.collect {|c_id|
909
- Flapjack::Data::Contact.find_by_id(c_id, :redis => @redis, :logger => @logger)
910
- }.compact
911
- end
912
-
913
- # override default, which would be 'entity_check_tag'
914
- def tag_prefix
915
- 'check_tag'
916
- end
917
-
918
- def tags_with_entity_and_check_name
919
- tags_without_entity_and_check_name
920
-
921
- # ensure that returned tags include split entity and check words
922
- @tags += @entity.name.split('.', 2).map {|x| x.downcase} +
923
- @check.split(' ').map {|x| x.downcase}
924
-
925
- @tags
926
- end
927
-
928
- alias_method :tags_without_entity_and_check_name, :tags
929
- alias_method :tags, :tags_with_entity_and_check_name
930
-
931
- def ack_hash
932
- @ack_hash ||= @redis.hget('check_hashes_by_id', @key)
933
- if @ack_hash.nil?
934
- sha1 = Digest::SHA1.new
935
- @ack_hash = Digest.hexencode(sha1.digest(@key))[0..7].downcase
936
- @redis.multi do |r|
937
- r.hset("checks_by_hash", @ack_hash, @key)
938
- r.hset("check_hashes_by_id", @key, @ack_hash)
939
- end
940
- end
941
- @ack_hash
942
- end
943
-
944
- def purge_history(opts = {})
945
- t = Time.now
946
- older_than = opts[:older_than] # purge older than this number of seconds ago
947
- raise ":older_than must be supplied" unless older_than
948
-
949
- purge_stamps = historical_states(-1, t.to_i - older_than).map {|s| s[:timestamp]}
950
- unless purge_stamps.empty?
951
- @logger.info "purging #{purge_stamps.length} states from #{@key}" if @logger
952
- deletees = []
953
- purge_stamps.each do |timestamp|
954
- deletees << "#{@key}:#{timestamp}:state"
955
- deletees << "#{@key}:#{timestamp}:summary"
956
- deletees << "#{@key}:#{timestamp}:count"
957
- deletees << "#{@key}:#{timestamp}:check_latency"
958
- end
959
- @logger.info " deleting a bunch of keys 100 at a time..." if @logger
960
- deletees.each_slice(100) do |batch|
961
- @redis.del(batch)
962
- end
963
- @logger.info " removing a range of items from the #{@key}:sorted_state_timestamps sorted set" if @logger
964
- @redis.zremrangebyscore("#{@key}:sorted_state_timestamps", '-inf', t.to_i - older_than)
965
- @logger.info " getting the #{@key}:states list" if @logger
966
- states = @redis.lrange("#{@key}:states", 0, -1)
967
- index = 0
968
- while states[index].to_i < older_than do
969
- index += 1
970
- end
971
- @logger.info " trimming the #{@key}:states from #{index}, length #{states.length}" if @logger
972
- @redis.ltrim("#{@key}:states", index, -1)
973
- end
974
- purge_stamps.length
975
- end
976
-
977
- def self.enabled_for(check_ids, opts = {})
978
- raise "Redis connection not set" unless redis = opts[:redis]
979
-
980
- check_ids.inject([]) do |memo, check_id|
981
- entity_name, check_name = check_id.split(':', 2)
982
- memo << check_id unless redis.zscore("current_checks:#{entity_name}", check_name).nil?
983
- memo
984
- end
985
- end
986
-
987
- def to_jsonapi(opts = {})
988
- json_data = {
989
- "id" => @key,
990
- "name" => @check,
991
- "entity_name" => @entity.name,
992
- "enabled" => opts[:enabled].is_a?(TrueClass),
993
- "tags" => self.tags.to_a,
994
- "links" => {
995
- :entities => opts[:entity_ids] || [],
996
- }
997
- }
998
- Flapjack.dump_json(json_data)
999
- end
1000
-
1001
- private
1002
-
1003
- def initialize(entity, check, options = {})
1004
- raise "Redis connection not set" unless @redis = options[:redis]
1005
- raise "Invalid entity (#{entity.inspect})" unless @entity = Flapjack.sanitize(entity)
1006
- raise "Invalid check (#{check.inspect} on #{entity.inspect})" unless @check = Flapjack.sanitize(check)
1007
- @key = "#{entity.name}:#{check}"
1008
- if @redis.zscore("all_checks", @key).nil?
1009
- timestamp = options[:timestamp] || Time.now.to_i
1010
- @redis.zadd("all_checks", timestamp, @key)
1011
- @redis.zadd("all_checks:#{entity.name}", timestamp, check)
1012
- end
1013
- @logger = options[:logger]
1014
- end
1015
-
1016
- def self.conflate_to_keys(entity_checks_hash)
1017
- entity_checks_hash.inject([]) {|memo, (entity, checks)|
1018
- memo += checks.collect {|check| "#{entity}:#{check}" }
1019
- memo
1020
- }
1021
- end
1022
-
1023
- def format_perfdata(perfdata)
1024
- # example perfdata: time=0.486630s;;;0.000000 size=909B;;;0
1025
- items = perfdata.split(' ')
1026
- # Do some fancy regex
1027
- data = []
1028
- items.each do |item|
1029
- components = item.split '='
1030
- key = components[0].to_s
1031
- value = ""
1032
- if components[1]
1033
- value = components[1].split(';')[0].to_s
1034
- end
1035
- data << {"key" => key, "value" => value}
1036
- end
1037
- data
1038
- end
1039
-
1040
- end
1041
-
1042
- end
1043
-
1044
- end