logstash-lib 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (419) hide show
  1. data/.gitignore +24 -0
  2. data/.tailor +8 -0
  3. data/.travis.yml +12 -0
  4. data/CHANGELOG +1185 -0
  5. data/CONTRIBUTING.md +61 -0
  6. data/CONTRIBUTORS +79 -0
  7. data/LICENSE +14 -0
  8. data/Makefile +460 -0
  9. data/README.md +120 -0
  10. data/STYLE.md +96 -0
  11. data/bin/logstash +37 -0
  12. data/bin/logstash-test +4 -0
  13. data/bin/logstash-web +4 -0
  14. data/bin/logstash.lib.sh +78 -0
  15. data/bot/check_pull_changelog.rb +89 -0
  16. data/docs/configuration.md +260 -0
  17. data/docs/docgen.rb +242 -0
  18. data/docs/extending/example-add-a-new-filter.md +121 -0
  19. data/docs/extending/index.md +91 -0
  20. data/docs/flags.md +43 -0
  21. data/docs/generate_index.rb +28 -0
  22. data/docs/index.html.erb +56 -0
  23. data/docs/learn.md +46 -0
  24. data/docs/life-of-an-event.md +109 -0
  25. data/docs/logging-tool-comparisons.md +60 -0
  26. data/docs/plugin-doc.html.erb +91 -0
  27. data/docs/plugin-milestones.md +41 -0
  28. data/docs/plugin-synopsis.html.erb +24 -0
  29. data/docs/release-engineering.md +46 -0
  30. data/docs/release-test-results.md +14 -0
  31. data/docs/repositories.md +35 -0
  32. data/docs/tutorials/10-minute-walkthrough/apache-elasticsearch.conf +35 -0
  33. data/docs/tutorials/10-minute-walkthrough/apache-parse.conf +33 -0
  34. data/docs/tutorials/10-minute-walkthrough/apache_log.1 +1 -0
  35. data/docs/tutorials/10-minute-walkthrough/apache_log.2.bz2 +0 -0
  36. data/docs/tutorials/10-minute-walkthrough/hello-search.conf +25 -0
  37. data/docs/tutorials/10-minute-walkthrough/hello.conf +16 -0
  38. data/docs/tutorials/10-minute-walkthrough/index.md +124 -0
  39. data/docs/tutorials/10-minute-walkthrough/step-5-output.txt +17 -0
  40. data/docs/tutorials/getting-started-centralized-overview-diagram.png +0 -0
  41. data/docs/tutorials/getting-started-centralized-overview-diagram.xml +1 -0
  42. data/docs/tutorials/getting-started-centralized.md +217 -0
  43. data/docs/tutorials/getting-started-simple.md +200 -0
  44. data/docs/tutorials/just-enough-rabbitmq-for-logstash.md +201 -0
  45. data/docs/tutorials/media/frontend-response-codes.png +0 -0
  46. data/docs/tutorials/metrics-from-logs.md +84 -0
  47. data/docs/tutorials/zeromq.md +118 -0
  48. data/extract_services.rb +29 -0
  49. data/gembag.rb +64 -0
  50. data/lib/logstash-event.rb +2 -0
  51. data/lib/logstash.rb +4 -0
  52. data/lib/logstash/JRUBY-6970-openssl.rb +22 -0
  53. data/lib/logstash/JRUBY-6970.rb +102 -0
  54. data/lib/logstash/agent.rb +305 -0
  55. data/lib/logstash/certs/cacert.pem +3895 -0
  56. data/lib/logstash/codecs/base.rb +49 -0
  57. data/lib/logstash/codecs/compress_spooler.rb +50 -0
  58. data/lib/logstash/codecs/dots.rb +18 -0
  59. data/lib/logstash/codecs/edn.rb +28 -0
  60. data/lib/logstash/codecs/edn_lines.rb +36 -0
  61. data/lib/logstash/codecs/fluent.rb +55 -0
  62. data/lib/logstash/codecs/graphite.rb +114 -0
  63. data/lib/logstash/codecs/json.rb +41 -0
  64. data/lib/logstash/codecs/json_lines.rb +52 -0
  65. data/lib/logstash/codecs/json_spooler.rb +22 -0
  66. data/lib/logstash/codecs/line.rb +58 -0
  67. data/lib/logstash/codecs/msgpack.rb +43 -0
  68. data/lib/logstash/codecs/multiline.rb +189 -0
  69. data/lib/logstash/codecs/netflow.rb +342 -0
  70. data/lib/logstash/codecs/netflow/util.rb +212 -0
  71. data/lib/logstash/codecs/noop.rb +19 -0
  72. data/lib/logstash/codecs/oldlogstashjson.rb +56 -0
  73. data/lib/logstash/codecs/plain.rb +48 -0
  74. data/lib/logstash/codecs/rubydebug.rb +22 -0
  75. data/lib/logstash/codecs/spool.rb +38 -0
  76. data/lib/logstash/config/Makefile +4 -0
  77. data/lib/logstash/config/config_ast.rb +380 -0
  78. data/lib/logstash/config/file.rb +39 -0
  79. data/lib/logstash/config/grammar.rb +3504 -0
  80. data/lib/logstash/config/grammar.treetop +241 -0
  81. data/lib/logstash/config/mixin.rb +464 -0
  82. data/lib/logstash/config/registry.rb +13 -0
  83. data/lib/logstash/config/test.conf +18 -0
  84. data/lib/logstash/errors.rb +10 -0
  85. data/lib/logstash/event.rb +262 -0
  86. data/lib/logstash/filters/advisor.rb +178 -0
  87. data/lib/logstash/filters/alter.rb +173 -0
  88. data/lib/logstash/filters/anonymize.rb +93 -0
  89. data/lib/logstash/filters/base.rb +190 -0
  90. data/lib/logstash/filters/checksum.rb +50 -0
  91. data/lib/logstash/filters/cidr.rb +76 -0
  92. data/lib/logstash/filters/cipher.rb +145 -0
  93. data/lib/logstash/filters/clone.rb +35 -0
  94. data/lib/logstash/filters/collate.rb +114 -0
  95. data/lib/logstash/filters/csv.rb +94 -0
  96. data/lib/logstash/filters/date.rb +244 -0
  97. data/lib/logstash/filters/dns.rb +201 -0
  98. data/lib/logstash/filters/drop.rb +32 -0
  99. data/lib/logstash/filters/elapsed.rb +256 -0
  100. data/lib/logstash/filters/elasticsearch.rb +73 -0
  101. data/lib/logstash/filters/environment.rb +27 -0
  102. data/lib/logstash/filters/extractnumbers.rb +84 -0
  103. data/lib/logstash/filters/gelfify.rb +52 -0
  104. data/lib/logstash/filters/geoip.rb +145 -0
  105. data/lib/logstash/filters/grep.rb +153 -0
  106. data/lib/logstash/filters/grok.rb +425 -0
  107. data/lib/logstash/filters/grokdiscovery.rb +75 -0
  108. data/lib/logstash/filters/i18n.rb +51 -0
  109. data/lib/logstash/filters/json.rb +90 -0
  110. data/lib/logstash/filters/json_encode.rb +52 -0
  111. data/lib/logstash/filters/kv.rb +232 -0
  112. data/lib/logstash/filters/metaevent.rb +68 -0
  113. data/lib/logstash/filters/metrics.rb +237 -0
  114. data/lib/logstash/filters/multiline.rb +241 -0
  115. data/lib/logstash/filters/mutate.rb +399 -0
  116. data/lib/logstash/filters/noop.rb +21 -0
  117. data/lib/logstash/filters/prune.rb +149 -0
  118. data/lib/logstash/filters/punct.rb +32 -0
  119. data/lib/logstash/filters/railsparallelrequest.rb +86 -0
  120. data/lib/logstash/filters/range.rb +142 -0
  121. data/lib/logstash/filters/ruby.rb +42 -0
  122. data/lib/logstash/filters/sleep.rb +111 -0
  123. data/lib/logstash/filters/split.rb +64 -0
  124. data/lib/logstash/filters/sumnumbers.rb +73 -0
  125. data/lib/logstash/filters/syslog_pri.rb +107 -0
  126. data/lib/logstash/filters/translate.rb +121 -0
  127. data/lib/logstash/filters/unique.rb +29 -0
  128. data/lib/logstash/filters/urldecode.rb +57 -0
  129. data/lib/logstash/filters/useragent.rb +112 -0
  130. data/lib/logstash/filters/uuid.rb +58 -0
  131. data/lib/logstash/filters/xml.rb +139 -0
  132. data/lib/logstash/filters/zeromq.rb +123 -0
  133. data/lib/logstash/filterworker.rb +122 -0
  134. data/lib/logstash/inputs/base.rb +125 -0
  135. data/lib/logstash/inputs/collectd.rb +306 -0
  136. data/lib/logstash/inputs/drupal_dblog.rb +323 -0
  137. data/lib/logstash/inputs/drupal_dblog/jdbcconnection.rb +66 -0
  138. data/lib/logstash/inputs/elasticsearch.rb +140 -0
  139. data/lib/logstash/inputs/eventlog.rb +129 -0
  140. data/lib/logstash/inputs/eventlog/racob_fix.rb +44 -0
  141. data/lib/logstash/inputs/exec.rb +69 -0
  142. data/lib/logstash/inputs/file.rb +146 -0
  143. data/lib/logstash/inputs/ganglia.rb +127 -0
  144. data/lib/logstash/inputs/ganglia/gmondpacket.rb +146 -0
  145. data/lib/logstash/inputs/ganglia/xdr.rb +327 -0
  146. data/lib/logstash/inputs/gelf.rb +138 -0
  147. data/lib/logstash/inputs/gemfire.rb +222 -0
  148. data/lib/logstash/inputs/generator.rb +97 -0
  149. data/lib/logstash/inputs/graphite.rb +41 -0
  150. data/lib/logstash/inputs/heroku.rb +51 -0
  151. data/lib/logstash/inputs/imap.rb +136 -0
  152. data/lib/logstash/inputs/irc.rb +84 -0
  153. data/lib/logstash/inputs/log4j.rb +136 -0
  154. data/lib/logstash/inputs/lumberjack.rb +53 -0
  155. data/lib/logstash/inputs/pipe.rb +57 -0
  156. data/lib/logstash/inputs/rabbitmq.rb +126 -0
  157. data/lib/logstash/inputs/rabbitmq/bunny.rb +118 -0
  158. data/lib/logstash/inputs/rabbitmq/hot_bunnies.rb +1 -0
  159. data/lib/logstash/inputs/rabbitmq/march_hare.rb +129 -0
  160. data/lib/logstash/inputs/redis.rb +263 -0
  161. data/lib/logstash/inputs/relp.rb +106 -0
  162. data/lib/logstash/inputs/s3.rb +279 -0
  163. data/lib/logstash/inputs/snmptrap.rb +87 -0
  164. data/lib/logstash/inputs/sqlite.rb +185 -0
  165. data/lib/logstash/inputs/sqs.rb +172 -0
  166. data/lib/logstash/inputs/stdin.rb +46 -0
  167. data/lib/logstash/inputs/stomp.rb +84 -0
  168. data/lib/logstash/inputs/syslog.rb +237 -0
  169. data/lib/logstash/inputs/tcp.rb +231 -0
  170. data/lib/logstash/inputs/threadable.rb +18 -0
  171. data/lib/logstash/inputs/twitter.rb +82 -0
  172. data/lib/logstash/inputs/udp.rb +81 -0
  173. data/lib/logstash/inputs/unix.rb +163 -0
  174. data/lib/logstash/inputs/varnishlog.rb +48 -0
  175. data/lib/logstash/inputs/websocket.rb +50 -0
  176. data/lib/logstash/inputs/wmi.rb +72 -0
  177. data/lib/logstash/inputs/xmpp.rb +81 -0
  178. data/lib/logstash/inputs/zenoss.rb +143 -0
  179. data/lib/logstash/inputs/zeromq.rb +165 -0
  180. data/lib/logstash/kibana.rb +113 -0
  181. data/lib/logstash/loadlibs.rb +9 -0
  182. data/lib/logstash/logging.rb +89 -0
  183. data/lib/logstash/monkeypatches-for-bugs.rb +2 -0
  184. data/lib/logstash/monkeypatches-for-debugging.rb +47 -0
  185. data/lib/logstash/monkeypatches-for-performance.rb +66 -0
  186. data/lib/logstash/multiqueue.rb +53 -0
  187. data/lib/logstash/namespace.rb +16 -0
  188. data/lib/logstash/outputs/base.rb +120 -0
  189. data/lib/logstash/outputs/boundary.rb +116 -0
  190. data/lib/logstash/outputs/circonus.rb +78 -0
  191. data/lib/logstash/outputs/cloudwatch.rb +351 -0
  192. data/lib/logstash/outputs/csv.rb +55 -0
  193. data/lib/logstash/outputs/datadog.rb +93 -0
  194. data/lib/logstash/outputs/datadog_metrics.rb +123 -0
  195. data/lib/logstash/outputs/elasticsearch.rb +332 -0
  196. data/lib/logstash/outputs/elasticsearch/elasticsearch-template.json +44 -0
  197. data/lib/logstash/outputs/elasticsearch_http.rb +256 -0
  198. data/lib/logstash/outputs/elasticsearch_river.rb +214 -0
  199. data/lib/logstash/outputs/email.rb +299 -0
  200. data/lib/logstash/outputs/exec.rb +40 -0
  201. data/lib/logstash/outputs/file.rb +180 -0
  202. data/lib/logstash/outputs/ganglia.rb +75 -0
  203. data/lib/logstash/outputs/gelf.rb +208 -0
  204. data/lib/logstash/outputs/gemfire.rb +103 -0
  205. data/lib/logstash/outputs/google_bigquery.rb +570 -0
  206. data/lib/logstash/outputs/google_cloud_storage.rb +431 -0
  207. data/lib/logstash/outputs/graphite.rb +143 -0
  208. data/lib/logstash/outputs/graphtastic.rb +185 -0
  209. data/lib/logstash/outputs/hipchat.rb +80 -0
  210. data/lib/logstash/outputs/http.rb +142 -0
  211. data/lib/logstash/outputs/irc.rb +80 -0
  212. data/lib/logstash/outputs/jira.rb +109 -0
  213. data/lib/logstash/outputs/juggernaut.rb +105 -0
  214. data/lib/logstash/outputs/librato.rb +146 -0
  215. data/lib/logstash/outputs/loggly.rb +93 -0
  216. data/lib/logstash/outputs/lumberjack.rb +51 -0
  217. data/lib/logstash/outputs/metriccatcher.rb +103 -0
  218. data/lib/logstash/outputs/mongodb.rb +81 -0
  219. data/lib/logstash/outputs/nagios.rb +119 -0
  220. data/lib/logstash/outputs/nagios_nsca.rb +123 -0
  221. data/lib/logstash/outputs/null.rb +18 -0
  222. data/lib/logstash/outputs/opentsdb.rb +101 -0
  223. data/lib/logstash/outputs/pagerduty.rb +79 -0
  224. data/lib/logstash/outputs/pipe.rb +132 -0
  225. data/lib/logstash/outputs/rabbitmq.rb +96 -0
  226. data/lib/logstash/outputs/rabbitmq/bunny.rb +135 -0
  227. data/lib/logstash/outputs/rabbitmq/hot_bunnies.rb +1 -0
  228. data/lib/logstash/outputs/rabbitmq/march_hare.rb +143 -0
  229. data/lib/logstash/outputs/redis.rb +245 -0
  230. data/lib/logstash/outputs/riak.rb +152 -0
  231. data/lib/logstash/outputs/riemann.rb +109 -0
  232. data/lib/logstash/outputs/s3.rb +356 -0
  233. data/lib/logstash/outputs/sns.rb +124 -0
  234. data/lib/logstash/outputs/solr_http.rb +78 -0
  235. data/lib/logstash/outputs/sqs.rb +141 -0
  236. data/lib/logstash/outputs/statsd.rb +116 -0
  237. data/lib/logstash/outputs/stdout.rb +53 -0
  238. data/lib/logstash/outputs/stomp.rb +67 -0
  239. data/lib/logstash/outputs/syslog.rb +145 -0
  240. data/lib/logstash/outputs/tcp.rb +145 -0
  241. data/lib/logstash/outputs/udp.rb +38 -0
  242. data/lib/logstash/outputs/websocket.rb +46 -0
  243. data/lib/logstash/outputs/websocket/app.rb +29 -0
  244. data/lib/logstash/outputs/websocket/pubsub.rb +45 -0
  245. data/lib/logstash/outputs/xmpp.rb +78 -0
  246. data/lib/logstash/outputs/zabbix.rb +108 -0
  247. data/lib/logstash/outputs/zeromq.rb +125 -0
  248. data/lib/logstash/pipeline.rb +286 -0
  249. data/lib/logstash/plugin.rb +150 -0
  250. data/lib/logstash/plugin_mixins/aws_config.rb +93 -0
  251. data/lib/logstash/program.rb +15 -0
  252. data/lib/logstash/runner.rb +238 -0
  253. data/lib/logstash/sized_queue.rb +8 -0
  254. data/lib/logstash/test.rb +183 -0
  255. data/lib/logstash/threadwatchdog.rb +37 -0
  256. data/lib/logstash/time_addon.rb +33 -0
  257. data/lib/logstash/util.rb +106 -0
  258. data/lib/logstash/util/buftok.rb +139 -0
  259. data/lib/logstash/util/charset.rb +39 -0
  260. data/lib/logstash/util/fieldreference.rb +50 -0
  261. data/lib/logstash/util/password.rb +25 -0
  262. data/lib/logstash/util/prctl.rb +11 -0
  263. data/lib/logstash/util/relp.rb +326 -0
  264. data/lib/logstash/util/require-helper.rb +18 -0
  265. data/lib/logstash/util/socket_peer.rb +7 -0
  266. data/lib/logstash/util/zenoss.rb +566 -0
  267. data/lib/logstash/util/zeromq.rb +47 -0
  268. data/lib/logstash/version.rb +6 -0
  269. data/locales/en.yml +170 -0
  270. data/logstash-event.gemspec +29 -0
  271. data/logstash.gemspec +128 -0
  272. data/patterns/firewalls +60 -0
  273. data/patterns/grok-patterns +91 -0
  274. data/patterns/haproxy +37 -0
  275. data/patterns/java +3 -0
  276. data/patterns/linux-syslog +14 -0
  277. data/patterns/mcollective +1 -0
  278. data/patterns/mcollective-patterns +4 -0
  279. data/patterns/nagios +108 -0
  280. data/patterns/postgresql +3 -0
  281. data/patterns/redis +3 -0
  282. data/patterns/ruby +2 -0
  283. data/pkg/build.sh +135 -0
  284. data/pkg/centos/after-install.sh +1 -0
  285. data/pkg/centos/before-install.sh +10 -0
  286. data/pkg/centos/before-remove.sh +11 -0
  287. data/pkg/centos/sysconfig +15 -0
  288. data/pkg/debian/after-install.sh +5 -0
  289. data/pkg/debian/before-install.sh +13 -0
  290. data/pkg/debian/before-remove.sh +13 -0
  291. data/pkg/debian/build.sh +34 -0
  292. data/pkg/debian/debian/README +6 -0
  293. data/pkg/debian/debian/changelog +17 -0
  294. data/pkg/debian/debian/compat +1 -0
  295. data/pkg/debian/debian/control +16 -0
  296. data/pkg/debian/debian/copyright +27 -0
  297. data/pkg/debian/debian/dirs +19 -0
  298. data/pkg/debian/debian/docs +0 -0
  299. data/pkg/debian/debian/logstash.default +39 -0
  300. data/pkg/debian/debian/logstash.init +201 -0
  301. data/pkg/debian/debian/logstash.install +1 -0
  302. data/pkg/debian/debian/logstash.logrotate +9 -0
  303. data/pkg/debian/debian/logstash.postinst +68 -0
  304. data/pkg/debian/debian/logstash.postrm +23 -0
  305. data/pkg/debian/debian/manpage.1.ex +59 -0
  306. data/pkg/debian/debian/preinst.ex +37 -0
  307. data/pkg/debian/debian/prerm.ex +40 -0
  308. data/pkg/debian/debian/release.conf +5 -0
  309. data/pkg/debian/debian/rules +80 -0
  310. data/pkg/debian/debian/watch.ex +22 -0
  311. data/pkg/logrotate.conf +8 -0
  312. data/pkg/logstash-web.default +41 -0
  313. data/pkg/logstash-web.sysv.debian +201 -0
  314. data/pkg/logstash-web.upstart.ubuntu +18 -0
  315. data/pkg/logstash.default +45 -0
  316. data/pkg/logstash.sysv.debian +202 -0
  317. data/pkg/logstash.sysv.redhat +158 -0
  318. data/pkg/logstash.upstart.ubuntu +20 -0
  319. data/pkg/rpm/SOURCES/logstash.conf +26 -0
  320. data/pkg/rpm/SOURCES/logstash.init +80 -0
  321. data/pkg/rpm/SOURCES/logstash.logrotate +8 -0
  322. data/pkg/rpm/SOURCES/logstash.sysconfig +3 -0
  323. data/pkg/rpm/SOURCES/logstash.wrapper +105 -0
  324. data/pkg/rpm/SPECS/logstash.spec +180 -0
  325. data/pkg/rpm/readme.md +4 -0
  326. data/pkg/ubuntu/after-install.sh +7 -0
  327. data/pkg/ubuntu/before-install.sh +12 -0
  328. data/pkg/ubuntu/before-remove.sh +13 -0
  329. data/pull_release_note.rb +25 -0
  330. data/require-analyze.rb +22 -0
  331. data/spec/README.md +14 -0
  332. data/spec/codecs/edn.rb +40 -0
  333. data/spec/codecs/edn_lines.rb +53 -0
  334. data/spec/codecs/graphite.rb +96 -0
  335. data/spec/codecs/json.rb +57 -0
  336. data/spec/codecs/json_lines.rb +51 -0
  337. data/spec/codecs/json_spooler.rb +43 -0
  338. data/spec/codecs/msgpack.rb +39 -0
  339. data/spec/codecs/multiline.rb +60 -0
  340. data/spec/codecs/oldlogstashjson.rb +55 -0
  341. data/spec/codecs/plain.rb +35 -0
  342. data/spec/codecs/spool.rb +35 -0
  343. data/spec/conditionals/test.rb +323 -0
  344. data/spec/config.rb +31 -0
  345. data/spec/event.rb +165 -0
  346. data/spec/examples/fail2ban.rb +28 -0
  347. data/spec/examples/graphite-input.rb +41 -0
  348. data/spec/examples/mysql-slow-query.rb +70 -0
  349. data/spec/examples/parse-apache-logs.rb +66 -0
  350. data/spec/examples/parse-haproxy-logs.rb +115 -0
  351. data/spec/examples/syslog.rb +48 -0
  352. data/spec/filters/alter.rb +96 -0
  353. data/spec/filters/anonymize.rb +189 -0
  354. data/spec/filters/checksum.rb +41 -0
  355. data/spec/filters/clone.rb +67 -0
  356. data/spec/filters/collate.rb +122 -0
  357. data/spec/filters/csv.rb +174 -0
  358. data/spec/filters/date.rb +285 -0
  359. data/spec/filters/date_performance.rb +31 -0
  360. data/spec/filters/dns.rb +159 -0
  361. data/spec/filters/drop.rb +19 -0
  362. data/spec/filters/elapsed.rb +294 -0
  363. data/spec/filters/environment.rb +43 -0
  364. data/spec/filters/geoip.rb +62 -0
  365. data/spec/filters/grep.rb +342 -0
  366. data/spec/filters/grok.rb +473 -0
  367. data/spec/filters/grok/timeout2.rb +56 -0
  368. data/spec/filters/grok/timeouts.rb +39 -0
  369. data/spec/filters/i18n.rb +25 -0
  370. data/spec/filters/json.rb +72 -0
  371. data/spec/filters/json_encode.rb +37 -0
  372. data/spec/filters/kv.rb +403 -0
  373. data/spec/filters/metrics.rb +212 -0
  374. data/spec/filters/multiline.rb +119 -0
  375. data/spec/filters/mutate.rb +180 -0
  376. data/spec/filters/noop.rb +221 -0
  377. data/spec/filters/prune.rb +441 -0
  378. data/spec/filters/punct.rb +18 -0
  379. data/spec/filters/railsparallelrequest.rb +112 -0
  380. data/spec/filters/range.rb +169 -0
  381. data/spec/filters/split.rb +58 -0
  382. data/spec/filters/translate.rb +70 -0
  383. data/spec/filters/unique.rb +25 -0
  384. data/spec/filters/useragent.rb +42 -0
  385. data/spec/filters/xml.rb +157 -0
  386. data/spec/inputs/file.rb +107 -0
  387. data/spec/inputs/gelf.rb +52 -0
  388. data/spec/inputs/generator.rb +30 -0
  389. data/spec/inputs/imap.rb +60 -0
  390. data/spec/inputs/redis.rb +63 -0
  391. data/spec/inputs/relp.rb +70 -0
  392. data/spec/inputs/tcp.rb +101 -0
  393. data/spec/jar.rb +21 -0
  394. data/spec/outputs/csv.rb +266 -0
  395. data/spec/outputs/elasticsearch.rb +161 -0
  396. data/spec/outputs/elasticsearch_http.rb +240 -0
  397. data/spec/outputs/email.rb +173 -0
  398. data/spec/outputs/file.rb +82 -0
  399. data/spec/outputs/graphite.rb +236 -0
  400. data/spec/outputs/redis.rb +127 -0
  401. data/spec/speed.rb +20 -0
  402. data/spec/sqlite-test.rb +81 -0
  403. data/spec/support/LOGSTASH-733.rb +21 -0
  404. data/spec/support/LOGSTASH-820.rb +25 -0
  405. data/spec/support/akamai-grok.rb +26 -0
  406. data/spec/support/date-http.rb +17 -0
  407. data/spec/support/postwait1.rb +26 -0
  408. data/spec/support/pull375.rb +21 -0
  409. data/spec/test_utils.rb +125 -0
  410. data/spec/util/fieldeval_spec.rb +44 -0
  411. data/test/jenkins/config.xml.erb +74 -0
  412. data/test/jenkins/create-jobs.rb +23 -0
  413. data/test/jenkins/generatorjob.config.xml +66 -0
  414. data/tools/Gemfile +14 -0
  415. data/tools/Gemfile.jruby-1.9.lock +322 -0
  416. data/tools/Gemfile.rbx-2.1.lock +516 -0
  417. data/tools/Gemfile.ruby-1.9.1.lock +310 -0
  418. data/tools/Gemfile.ruby-2.0.0.lock +310 -0
  419. metadata +629 -0
@@ -0,0 +1,75 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/base"
3
+ require "logstash/namespace"
4
+
5
+ # This output allows you to pull metrics from your logs and ship them to
6
+ # ganglia's gmond. This is heavily based on the graphite output.
7
+ class LogStash::Outputs::Ganglia < LogStash::Outputs::Base
8
+ config_name "ganglia"
9
+ milestone 2
10
+
11
+ # The address of the ganglia server.
12
+ config :host, :validate => :string, :default => "localhost"
13
+
14
+ # The port to connect on your ganglia server.
15
+ config :port, :validate => :number, :default => 8649
16
+
17
+ # The metric to use. This supports dynamic strings like `%{host}`
18
+ config :metric, :validate => :string, :required => true
19
+
20
+ # The value to use. This supports dynamic strings like `%{bytes}`
21
+ # It will be coerced to a floating point value. Values which cannot be
22
+ # coerced will zero (0)
23
+ config :value, :validate => :string, :required => true
24
+
25
+ # The type of value for this metric.
26
+ config :metric_type, :validate => %w{string int8 uint8 int16 uint16 int32 uint32 float double},
27
+ :default => "uint8"
28
+
29
+ # Gmetric units for metric, such as "kb/sec" or "ms" or whatever unit
30
+ # this metric uses.
31
+ config :units, :validate => :string, :default => ""
32
+
33
+ # Maximum time in seconds between gmetric calls for this metric.
34
+ config :max_interval, :validate => :number, :default => 60
35
+
36
+ # Lifetime in seconds of this metric
37
+ config :lifetime, :validate => :number, :default => 300
38
+
39
+ # Metric group
40
+ config :group, :validate => :string, :default => ""
41
+
42
+ # Metric slope, represents metric behavior
43
+ config :slope, :validate => %w{zero positive negative both unspecified}, :default => "both"
44
+
45
+ def register
46
+ require "gmetric"
47
+ end # def register
48
+
49
+ public
50
+ def receive(event)
51
+ return unless output?(event)
52
+
53
+ # gmetric only takes integer values, so convert it to int.
54
+ case @metric_type
55
+ when "string"
56
+ localvalue = event.sprintf(@value)
57
+ when "float"
58
+ localvalue = event.sprintf(@value).to_f
59
+ when "double"
60
+ localvalue = event.sprintf(@value).to_f
61
+ else # int8|uint8|int16|uint16|int32|uint32
62
+ localvalue = event.sprintf(@value).to_i
63
+ end
64
+ Ganglia::GMetric.send(@host, @port, {
65
+ :name => event.sprintf(@metric),
66
+ :units => @units,
67
+ :type => @metric_type,
68
+ :value => localvalue,
69
+ :group => @group,
70
+ :slope => @slope,
71
+ :tmax => @max_interval,
72
+ :dmax => @lifetime
73
+ })
74
+ end # def receive
75
+ end # class LogStash::Outputs::Ganglia
@@ -0,0 +1,208 @@
1
+ # encoding: utf-8
2
+ require "logstash/namespace"
3
+ require "logstash/outputs/base"
4
+
5
+ # GELF output. This is most useful if you want to use logstash
6
+ # to output events to graylog2.
7
+ #
8
+ # More information at <http://graylog2.org/gelf#specs>
9
+ class LogStash::Outputs::Gelf < LogStash::Outputs::Base
10
+
11
+ config_name "gelf"
12
+ milestone 2
13
+
14
+ # graylog2 server address
15
+ config :host, :validate => :string, :required => true
16
+
17
+ # graylog2 server port
18
+ config :port, :validate => :number, :default => 12201
19
+
20
+ # The GELF chunksize. You usually don't need to change this.
21
+ config :chunksize, :validate => :number, :default => 1420
22
+
23
+ # Allow overriding of the gelf 'sender' field. This is useful if you
24
+ # want to use something other than the event's source host as the
25
+ # "sender" of an event. A common case for this is using the application name
26
+ # instead of the hostname.
27
+ config :sender, :validate => :string, :default => "%{host}"
28
+
29
+ # The GELF message level. Dynamic values like %{level} are permitted here;
30
+ # useful if you want to parse the 'log level' from an event and use that
31
+ # as the gelf level/severity.
32
+ #
33
+ # Values here can be integers [0..7] inclusive or any of
34
+ # "debug", "info", "warn", "error", "fatal" (case insensitive).
35
+ # Single-character versions of these are also valid, "d", "i", "w", "e", "f",
36
+ # "u"
37
+ # The following additional severity_labels from logstash's syslog_pri filter
38
+ # are accepted: "emergency", "alert", "critical", "warning", "notice", and
39
+ # "informational"
40
+ config :level, :validate => :array, :default => [ "%{severity}", "INFO" ]
41
+
42
+ # The GELF facility. Dynamic values like %{foo} are permitted here; this
43
+ # is useful if you need to use a value from the event as the facility name.
44
+ config :facility, :validate => :string, :deprecated => true
45
+
46
+ # The GELF line number; this is usually the line number in your program where
47
+ # the log event originated. Dynamic values like %{foo} are permitted here, but the
48
+ # value should be a number.
49
+ config :line, :validate => :string, :deprecated => true
50
+
51
+ # The GELF file; this is usually the source code file in your program where
52
+ # the log event originated. Dynamic values like %{foo} are permitted here.
53
+ config :file, :validate => :string, :deprecated => true
54
+
55
+ # Ship metadata within event object? This will cause logstash to ship
56
+ # any fields in the event (such as those created by grok) in the GELF
57
+ # messages.
58
+ config :ship_metadata, :validate => :boolean, :default => true
59
+
60
+ # Ship tags within events. This will cause logstash to ship the tags of an
61
+ # event as the field _tags.
62
+ config :ship_tags, :validate => :boolean, :default => true
63
+
64
+ # Ignore these fields when ship_metadata is set. Typically this lists the
65
+ # fields used in dynamic values for GELF fields.
66
+ config :ignore_metadata, :validate => :array, :default => [ "@timestamp", "@version", "severity", "host", "source_host", "source_path", "short_message" ]
67
+
68
+ # The GELF custom field mappings. GELF supports arbitrary attributes as custom
69
+ # fields. This exposes that. Exclude the `_` portion of the field name
70
+ # e.g. `custom_fields => ['foo_field', 'some_value']
71
+ # sets `_foo_field` = `some_value`
72
+ config :custom_fields, :validate => :hash, :default => {}
73
+
74
+ # The GELF full message. Dynamic values like %{foo} are permitted here.
75
+ config :full_message, :validate => :string, :default => "%{message}"
76
+
77
+ # The GELF short message field name. If the field does not exist or is empty,
78
+ # the event message is taken instead.
79
+ config :short_message, :validate => :string, :default => "short_message"
80
+
81
+ public
82
+ def register
83
+ require "gelf" # rubygem 'gelf'
84
+ option_hash = Hash.new
85
+
86
+ #@gelf = GELF::Notifier.new(@host, @port, @chunksize, option_hash)
87
+ @gelf = GELF::Notifier.new(@host, @port, @chunksize)
88
+
89
+ # This sets the 'log level' of gelf; since we're forwarding messages, we'll
90
+ # want to forward *all* messages, so set level to 0 so all messages get
91
+ # shipped
92
+ @gelf.level = 0
93
+
94
+ # Since we use gelf-rb which assumes the severity level integer
95
+ # is coming from a ruby logging subsystem, we need to instruct it
96
+ # that the levels we provide should be mapped directly since they're
97
+ # already RFC 5424 compliant
98
+ # this requires gelf-rb commit bb1f4a9 which added the level_mapping def
99
+ level_mapping = Hash.new
100
+ (0..7).step(1) { |l| level_mapping[l]=l }
101
+ @gelf.level_mapping = level_mapping
102
+
103
+ # If we leave that set, the gelf gem will extract the file and line number
104
+ # of the source file that logged the message (i.e. logstash/gelf.rb:138).
105
+ # With that set to false, it can use the actual event's filename (i.e.
106
+ # /var/log/syslog), which is much more useful
107
+ @gelf.collect_file_and_line = false
108
+
109
+ # these are syslog words and abbreviations mapped to RFC 5424 integers
110
+ # and logstash's syslog_pri filter
111
+ @level_map = {
112
+ "debug" => 7, "d" => 7,
113
+ "info" => 6, "i" => 6, "informational" => 6,
114
+ "notice" => 5, "n" => 5,
115
+ "warn" => 4, "w" => 4, "warning" => 4,
116
+ "error" => 3, "e" => 3,
117
+ "critical" => 2, "c" => 2,
118
+ "alert" => 1, "a" => 1,
119
+ "emergency" => 0, "e" => 0,
120
+ }
121
+ end # def register
122
+
123
+ public
124
+ def receive(event)
125
+ return unless output?(event)
126
+
127
+ # We have to make our own hash here because GELF expects a hash
128
+ # with a specific format.
129
+ m = Hash.new
130
+
131
+ m["short_message"] = event["message"]
132
+ if event[@short_message]
133
+ v = event[@short_message]
134
+ short_message = (v.is_a?(Array) && v.length == 1) ? v.first : v
135
+ short_message = short_message.to_s
136
+ if !short_message.empty?
137
+ m["short_message"] = short_message
138
+ end
139
+ end
140
+
141
+ m["full_message"] = event.sprintf(@full_message)
142
+
143
+ m["host"] = event.sprintf(@sender)
144
+
145
+ # deprecated fields
146
+ m["facility"] = event.sprintf(@facility) if @facility
147
+ m["file"] = event.sprintf(@file) if @file
148
+ m["line"] = event.sprintf(@line) if @line
149
+ m["line"] = m["line"].to_i if m["line"].is_a?(String) and m["line"] === /^[\d]+$/
150
+
151
+ if @ship_metadata
152
+ event.to_hash.each do |name, value|
153
+ next if value == nil
154
+ next if name == "message"
155
+
156
+ # Trim leading '_' in the event
157
+ name = name[1..-1] if name.start_with?('_')
158
+ name = "_id" if name == "id" # "_id" is reserved, so use "__id"
159
+ if !value.nil? and !@ignore_metadata.include?(name)
160
+ if value.is_a?(Array)
161
+ m["_#{name}"] = value.join(', ')
162
+ elsif value.is_a?(Hash)
163
+ value.each do |hash_name, hash_value|
164
+ m["_#{name}_#{hash_name}"] = hash_value
165
+ end
166
+ else
167
+ # Non array values should be presented as-is
168
+ # https://logstash.jira.com/browse/LOGSTASH-113
169
+ m["_#{name}"] = value
170
+ end
171
+ end
172
+ end
173
+ end
174
+
175
+ if @ship_tags
176
+ m["_tags"] = event["tags"].join(', ') if event["tags"]
177
+ end
178
+
179
+ if @custom_fields
180
+ @custom_fields.each do |field_name, field_value|
181
+ m["_#{field_name}"] = field_value unless field_name == 'id'
182
+ end
183
+ end
184
+
185
+ # Probe severity array levels
186
+ level = nil
187
+ if @level.is_a?(Array)
188
+ @level.each do |value|
189
+ parsed_value = event.sprintf(value)
190
+ next if value.count('%{') > 0 and parsed_value == value
191
+
192
+ level = parsed_value
193
+ break
194
+ end
195
+ else
196
+ level = event.sprintf(@level.to_s)
197
+ end
198
+ m["level"] = (@level_map[level.downcase] || level).to_i
199
+
200
+ @logger.debug(["Sending GELF event", m])
201
+ begin
202
+ @gelf.notify!(m, :timestamp => event["@timestamp"].to_f)
203
+ rescue
204
+ @logger.warn("Trouble sending GELF event", :gelf_event => m,
205
+ :event => event, :error => $!)
206
+ end
207
+ end # def receive
208
+ end # class LogStash::Outputs::Gelf
@@ -0,0 +1,103 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/base"
3
+ require "logstash/namespace"
4
+
5
+ # Push events to a GemFire region.
6
+ #
7
+ # GemFire is an object database.
8
+ #
9
+ # To use this plugin you need to add gemfire.jar to your CLASSPATH;
10
+ # using format=json requires jackson.jar too.
11
+ #
12
+ # Note: this plugin has only been tested with GemFire 7.0.
13
+ #
14
+ class LogStash::Outputs::Gemfire < LogStash::Outputs::Base
15
+
16
+ config_name "gemfire"
17
+ milestone 1
18
+
19
+ # Your client cache name
20
+ config :cache_name, :validate => :string, :default => "logstash"
21
+
22
+ # The path to a GemFire client cache XML file.
23
+ #
24
+ # Example:
25
+ #
26
+ # <client-cache>
27
+ # <pool name="client-pool">
28
+ # <locator host="localhost" port="31331"/>
29
+ # </pool>
30
+ # <region name="Logstash">
31
+ # <region-attributes refid="CACHING_PROXY" pool-name="client-pool" >
32
+ # </region-attributes>
33
+ # </region>
34
+ # </client-cache>
35
+ #
36
+ config :cache_xml_file, :validate => :string, :default => nil
37
+
38
+ # The region name
39
+ config :region_name, :validate => :string, :default => "Logstash"
40
+
41
+ # A sprintf format to use when building keys
42
+ config :key_format, :validate => :string, :default => "%{host}-%{@timestamp}"
43
+
44
+ public
45
+ def register
46
+ import com.gemstone.gemfire.cache.client.ClientCacheFactory
47
+ import com.gemstone.gemfire.pdx.JSONFormatter
48
+
49
+ @logger.info("Registering output", :plugin => self)
50
+ connect
51
+ end # def register
52
+
53
+ public
54
+ def connect
55
+ begin
56
+ @logger.debug("Connecting to GemFire #{@cache_name}")
57
+
58
+ @cache = ClientCacheFactory.new.
59
+ set("name", @cache_name).
60
+ set("cache-xml-file", @cache_xml_file).create
61
+ @logger.debug("Created cache #{@cache.inspect}")
62
+
63
+ rescue => e
64
+ if terminating?
65
+ return
66
+ else
67
+ @logger.error("Gemfire connection error (during connect), will reconnect",
68
+ :exception => e, :backtrace => e.backtrace)
69
+ sleep(1)
70
+ retry
71
+ end
72
+ end
73
+
74
+ @region = @cache.getRegion(@region_name);
75
+ @logger.debug("Created region #{@region.inspect}")
76
+ end # def connect
77
+
78
+ public
79
+ def receive(event)
80
+ return unless output?(event)
81
+
82
+ @logger.debug("Sending event", :destination => to_s, :event => event)
83
+
84
+ key = event.sprintf @key_format
85
+
86
+ message = JSONFormatter.fromJSON(event.to_json)
87
+
88
+ @logger.debug("Publishing message", { :destination => to_s, :message => message, :key => key })
89
+ @region.put(key, message)
90
+ end # def receive
91
+
92
+ public
93
+ def to_s
94
+ return "gemfire://#{cache_name}"
95
+ end
96
+
97
+ public
98
+ def teardown
99
+ @cache.close if @cache
100
+ @cache = nil
101
+ finished
102
+ end # def teardown
103
+ end # class LogStash::Outputs::Gemfire
@@ -0,0 +1,570 @@
1
+ # Author: Rodrigo De Castro <rdc@google.com>
2
+ # Date: 2013-09-20
3
+ #
4
+ # Copyright 2013 Google Inc.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ require "logstash/outputs/base"
18
+ require "logstash/namespace"
19
+
20
+ # Summary: plugin to upload log events to Google BigQuery (BQ), rolling
21
+ # files based on the date pattern provided as a configuration setting. Events
22
+ # are written to files locally and, once file is closed, this plugin uploads
23
+ # it to the configured BigQuery dataset.
24
+ #
25
+ # VERY IMPORTANT:
26
+ # 1 - To make good use of BigQuery, your log events should be parsed and
27
+ # structured. Consider using grok to parse your events into fields that can
28
+ # be uploaded to BQ.
29
+ # 2 - You must configure your plugin so it gets events with the same structure,
30
+ # so the BigQuery schema suits them. In case you want to upload log events
31
+ # with different structures, you can utilize multiple configuration blocks,
32
+ # separating different log events with Logstash conditionals. More details on
33
+ # Logstash conditionals can be found here:
34
+ # http://logstash.net/docs/1.2.1/configuration#conditionals
35
+ #
36
+ # For more info on Google BigQuery, please go to:
37
+ # https://developers.google.com/bigquery/
38
+ #
39
+ # In order to use this plugin, a Google service account must be used. For
40
+ # more information, please refer to:
41
+ # https://developers.google.com/storage/docs/authentication#service_accounts
42
+ #
43
+ # Recommendations:
44
+
45
+ # a - Experiment with the settings depending on how much log data you generate,
46
+ # your needs to see "fresh" data, and how much data you could lose in the event
47
+ # of crash. For instance, if you want to see recent data in BQ quickly, you
48
+ # could configure the plugin to upload data every minute or so (provided you
49
+ # have enough log events to justify that). Note also, that if uploads are too
50
+ # frequent, there is no guarantee that they will be imported in the same order,
51
+ # so later data may be available before earlier data.
52
+
53
+ # b - BigQuery charges for storage and for queries, depending on how much data
54
+ # it reads to perform a query. These are other aspects to consider when
55
+ # considering the date pattern which will be used to create new tables and also
56
+ # how to compose the queries when using BQ. For more info on BigQuery Pricing,
57
+ # please access:
58
+ # https://developers.google.com/bigquery/pricing
59
+ #
60
+ # USAGE:
61
+ # This is an example of logstash config:
62
+ #
63
+ # output {
64
+ # google_bigquery {
65
+ # project_id => "folkloric-guru-278" (required)
66
+ # dataset => "logs" (required)
67
+ # csv_schema => "path:STRING,status:INTEGER,score:FLOAT" (required)
68
+ # key_path => "/path/to/privatekey.p12" (required)
69
+ # key_password => "notasecret" (optional)
70
+ # service_account => "1234@developer.gserviceaccount.com" (required)
71
+ # temp_directory => "/tmp/logstash-bq" (optional)
72
+ # temp_file_prefix => "logstash_bq" (optional)
73
+ # date_pattern => "%Y-%m-%dT%H:00" (optional)
74
+ # flush_interval_secs => 2 (optional)
75
+ # uploader_interval_secs => 60 (optional)
76
+ # deleter_interval_secs => 60 (optional)
77
+ # }
78
+ # }
79
+ #
80
+ # Improvements TODO list:
81
+ # - Refactor common code between Google BQ and GCS plugins.
82
+ # - Turn Google API code into a Plugin Mixin (like AwsConfig).
83
+ # - There's no recover method, so if logstash/plugin crashes, files may not
84
+ # be uploaded to BQ.
85
+ class LogStash::Outputs::GoogleBigQuery < LogStash::Outputs::Base
86
+ config_name "google_bigquery"
87
+ milestone 1
88
+
89
+ # Google Cloud Project ID (number, not Project Name!).
90
+ config :project_id, :validate => :string, :required => true
91
+
92
+ # BigQuery dataset to which these events will be added to.
93
+ config :dataset, :validate => :string, :required => true
94
+
95
+ # BigQuery table ID prefix to be used when creating new tables for log data.
96
+ # Table name will be <table_prefix>_<date>
97
+ config :table_prefix, :validate => :string, :default => "logstash"
98
+
99
+ # Schema for log data. It must follow this format:
100
+ # <field1-name>:<field1-type>,<field2-name>:<field2-type>,...
101
+ # Example: path:STRING,status:INTEGER,score:FLOAT
102
+ config :csv_schema, :validate => :string, :required => true
103
+
104
+ # Path to private key file for Google Service Account.
105
+ config :key_path, :validate => :string, :required => true
106
+
107
+ # Private key password for service account private key.
108
+ config :key_password, :validate => :string, :default => "notasecret"
109
+
110
+ # Service account to access Google APIs.
111
+ config :service_account, :validate => :string, :required => true
112
+
113
+ # Directory where temporary files are stored.
114
+ # Defaults to /tmp/logstash-bq-<random-suffix>
115
+ config :temp_directory, :validate => :string, :default => ""
116
+
117
+ # Temporary local file prefix. Log file will follow the format:
118
+ # <prefix>_hostname_date.part?.log
119
+ config :temp_file_prefix, :validate => :string, :default => "logstash_bq"
120
+
121
+ # Time pattern for BigQuery table, defaults to hourly tables.
122
+ # Must Time.strftime patterns: www.ruby-doc.org/core-2.0/Time.html#method-i-strftime
123
+ config :date_pattern, :validate => :string, :default => "%Y-%m-%dT%H:00"
124
+
125
+ # Flush interval in seconds for flushing writes to log files. 0 will flush
126
+ # on every message.
127
+ config :flush_interval_secs, :validate => :number, :default => 2
128
+
129
+ # Uploader interval when uploading new files to BigQuery. Adjust time based
130
+ # on your time pattern (for example, for hourly files, this interval can be
131
+ # around one hour).
132
+ config :uploader_interval_secs, :validate => :number, :default => 60
133
+
134
+ # Deleter interval when checking if upload jobs are done for file deletion.
135
+ # This only affects how long files are on the hard disk after the job is done.
136
+ config :deleter_interval_secs, :validate => :number, :default => 60
137
+
138
+ public
139
+ def register
140
+ require 'csv'
141
+ require "fileutils"
142
+ require "thread"
143
+
144
+ @logger.debug("BQ: register plugin")
145
+
146
+ @fields = Array.new
147
+
148
+ CSV.parse(@csv_schema.gsub('\"', '""')).flatten.each do |field|
149
+ temp = field.strip.split(":")
150
+
151
+ # Check that the field in the schema follows the format (<name>:<value>)
152
+ if temp.length != 2
153
+ raise "BigQuery schema must follow the format <field-name>:<field-value>"
154
+ end
155
+
156
+ @fields << { "name" => temp[0], "type" => temp[1] }
157
+ end
158
+
159
+ # Check that we have at least one field in the schema
160
+ if @fields.length == 0
161
+ raise "BigQuery schema must contain at least one field"
162
+ end
163
+
164
+ @json_schema = { "fields" => @fields }
165
+
166
+ @upload_queue = Queue.new
167
+ @delete_queue = Queue.new
168
+ @last_flush_cycle = Time.now
169
+ initialize_temp_directory()
170
+ initialize_current_log()
171
+ initialize_google_client()
172
+ initialize_uploader()
173
+ initialize_deleter()
174
+ end
175
+
176
+ # Method called for each log event. It writes the event to the current output
177
+ # file, flushing depending on flush interval configuration.
178
+ public
179
+ def receive(event)
180
+ return unless output?(event)
181
+
182
+ @logger.debug("BQ: receive method called", :event => event)
183
+
184
+ # Message must be written as json
185
+ message = event.to_json
186
+ # Remove "@" from property names
187
+ message = message.gsub(/\"@(\w+)\"/, '"\1"')
188
+
189
+ new_base_path = get_base_path()
190
+
191
+ # Time to roll file based on the date pattern? Or are we due to upload it to BQ?
192
+ if (@current_base_path != new_base_path || Time.now - @last_file_time >= @uploader_interval_secs)
193
+ @logger.debug("BQ: log file will be closed and uploaded",
194
+ :filename => File.basename(@temp_file.to_path),
195
+ :size => @temp_file.size.to_s,
196
+ :uploader_interval_secs => @uploader_interval_secs.to_s)
197
+ # Close alone does not guarantee that data is physically written to disk,
198
+ # so flushing it before.
199
+ @temp_file.fsync()
200
+ @temp_file.close()
201
+ initialize_next_log()
202
+ end
203
+
204
+ @temp_file.write(message)
205
+ @temp_file.write("\n")
206
+
207
+ sync_log_file()
208
+
209
+ @logger.debug("BQ: event appended to log file",
210
+ :filename => File.basename(@temp_file.to_path))
211
+ end
212
+
213
+ public
214
+ def teardown
215
+ @logger.debug("BQ: teardown method called")
216
+
217
+ @temp_file.flush()
218
+ @temp_file.close()
219
+ end
220
+
221
+ private
222
+ ##
223
+ # Flushes temporary log file every flush_interval_secs seconds or so.
224
+ # This is triggered by events, but if there are no events there's no point
225
+ # flushing files anyway.
226
+ #
227
+ # Inspired by lib/logstash/outputs/file.rb (flush(fd), flush_pending_files)
228
+ def sync_log_file
229
+ if flush_interval_secs <= 0
230
+ @temp_file.fsync
231
+ return
232
+ end
233
+
234
+ return unless Time.now - @last_flush_cycle >= flush_interval_secs
235
+ @temp_file.fsync
236
+ @logger.debug("BQ: flushing file",
237
+ :path => @temp_file.to_path,
238
+ :fd => @temp_file)
239
+ @last_flush_cycle = Time.now
240
+ end
241
+
242
+ ##
243
+ # Creates temporary directory, if it does not exist.
244
+ #
245
+ # A random suffix is appended to the temporary directory
246
+ def initialize_temp_directory
247
+ if @temp_directory.empty?
248
+ require "stud/temporary"
249
+ @temp_directory = Stud::Temporary.directory("logstash-bq")
250
+ @logger.info("BQ: temporary directory generated",
251
+ :directory => @temp_directory)
252
+ end
253
+
254
+ if !(File.directory? @temp_directory)
255
+ @logger.debug("BQ: directory doesn't exist. Creating it.",
256
+ :directory => @temp_directory)
257
+ FileUtils.mkdir_p(@temp_directory)
258
+ end
259
+ end
260
+
261
+ ##
262
+ # Starts thread to delete uploaded log files once their jobs are done.
263
+ #
264
+ # Deleter is done in a separate thread, not holding the receive method above.
265
+ def initialize_deleter
266
+ @uploader = Thread.new do
267
+ @logger.debug("BQ: starting deleter")
268
+ while true
269
+ delete_item = @delete_queue.pop
270
+ job_id = delete_item["job_id"]
271
+ filename = delete_item["filename"]
272
+ job_status = get_job_status(job_id)
273
+ case job_status["state"]
274
+ when "DONE"
275
+ if job_status.has_key?("errorResult")
276
+ @logger.error("BQ: job failed, please enable debug and check full "\
277
+ "response (probably the issue is an incompatible "\
278
+ "schema). NOT deleting local file.",
279
+ :job_id => job_id,
280
+ :filename => filename,
281
+ :job_status => job_status)
282
+ else
283
+ @logger.debug("BQ: job is done, deleting local temporary file ",
284
+ :job_id => job_id,
285
+ :filename => filename,
286
+ :job_status => job_status)
287
+ File.delete(filename)
288
+ end
289
+ when "PENDING", "RUNNING"
290
+ @logger.debug("BQ: job is not done, NOT deleting local file yet.",
291
+ :job_id => job_id,
292
+ :filename => filename,
293
+ :job_status => job_status)
294
+ @delete_queue << delete_item
295
+ else
296
+ @logger.error("BQ: unknown job status, please enable debug and "\
297
+ "check full response (probably the issue is an "\
298
+ "incompatible schema). NOT deleting local file yet.",
299
+ :job_id => job_id,
300
+ :filename => filename,
301
+ :job_status => job_status)
302
+ end
303
+
304
+ sleep @deleter_interval_secs
305
+ end
306
+ end
307
+ end
308
+
309
+ ##
310
+ # Starts thread to upload log files.
311
+ #
312
+ # Uploader is done in a separate thread, not holding the receive method above.
313
+ def initialize_uploader
314
+ @uploader = Thread.new do
315
+ @logger.debug("BQ: starting uploader")
316
+ while true
317
+ filename = @upload_queue.pop
318
+
319
+ # Reenqueue if it is still the current file.
320
+ if filename == @temp_file.to_path
321
+ if @current_base_path == get_base_path()
322
+ if Time.now - @last_file_time < @uploader_interval_secs
323
+ @logger.debug("BQ: reenqueue as log file is being currently appended to.",
324
+ :filename => filename)
325
+ @upload_queue << filename
326
+ # If we got here, it means that older files were uploaded, so let's
327
+ # wait another minute before checking on this file again.
328
+ sleep @uploader_interval_secs
329
+ next
330
+ else
331
+ @logger.debug("BQ: flush and close file to be uploaded.",
332
+ :filename => filename)
333
+ @temp_file.flush()
334
+ @temp_file.close()
335
+ initialize_next_log()
336
+ end
337
+ end
338
+ end
339
+
340
+ if File.size(filename) > 0
341
+ job_id = upload_object(filename)
342
+ @delete_queue << { "filename" => filename, "job_id" => job_id }
343
+ else
344
+ @logger.debug("BQ: skipping empty file.")
345
+ @logger.debug("BQ: delete local temporary file ",
346
+ :filename => filename)
347
+ File.delete(filename)
348
+ end
349
+
350
+ sleep @uploader_interval_secs
351
+ end
352
+ end
353
+ end
354
+
355
+ ##
356
+ # Returns undated path used to construct base path and final full path.
357
+ # This path only includes directory, prefix, and hostname info.
358
+ def get_undated_path
359
+ return @temp_directory + File::SEPARATOR + @temp_file_prefix + "_" +
360
+ Socket.gethostname()
361
+ end
362
+
363
+ ##
364
+ # Returns base path to log file that is invariant regardless of any
365
+ # user options.
366
+ def get_base_path
367
+ return get_undated_path() + "_" + Time.now.strftime(@date_pattern)
368
+ end
369
+
370
+ ##
371
+ # Returns full path to the log file based on global variables (like
372
+ # current_base_path) and configuration options (max file size).
373
+ def get_full_path
374
+ return @current_base_path + ".part" + ("%03d" % @size_counter) + ".log"
375
+ end
376
+
377
+ ##
378
+ # Returns date from a temporary log file name.
379
+ def get_date_pattern(filename)
380
+ match = /^#{get_undated_path()}_(?<date>.*)\.part(\d+)\.log$/.match(filename)
381
+ return match[:date]
382
+ end
383
+
384
+ ##
385
+ # Returns latest part number for a base path. This method checks all existing
386
+ # log files in order to find the highest part number, so this file can be used
387
+ # for appending log events.
388
+ #
389
+ # Only applicable if max file size is enabled.
390
+ def get_latest_part_number(base_path)
391
+ part_numbers = Dir.glob(base_path + ".part*.log").map do |item|
392
+ match = /^.*\.part(?<part_num>\d+).log$/.match(item)
393
+ next if match.nil?
394
+ match[:part_num].to_i
395
+ end
396
+
397
+ return part_numbers.max if part_numbers.any?
398
+ 0
399
+ end
400
+
401
+ ##
402
+ # Opens current log file and updates @temp_file with an instance of IOWriter.
403
+ # This method also adds file to the upload queue.
404
+ def open_current_file()
405
+ path = get_full_path()
406
+ stat = File.stat(path) rescue nil
407
+ if stat and stat.ftype == "fifo" and RUBY_PLATFORM == "java"
408
+ fd = java.io.FileWriter.new(java.io.File.new(path))
409
+ else
410
+ fd = File.new(path, "a")
411
+ end
412
+ @temp_file = IOWriter.new(fd)
413
+ @upload_queue << @temp_file.to_path
414
+ end
415
+
416
+ ##
417
+ # Opens log file on plugin initialization, trying to resume from an existing
418
+ # file. If max file size is enabled, find the highest part number and resume
419
+ # from it.
420
+ def initialize_current_log
421
+ @current_base_path = get_base_path
422
+ @last_file_time = Time.now
423
+ @size_counter = get_latest_part_number(@current_base_path)
424
+ @logger.debug("BQ: resuming from latest part.",
425
+ :part => @size_counter)
426
+ open_current_file()
427
+ end
428
+
429
+ ##
430
+ # Generates new log file name based on configuration options and opens log
431
+ # file. If max file size is enabled, part number if incremented in case the
432
+ # the base log file name is the same (e.g. log file was not rolled given the
433
+ # date pattern).
434
+ def initialize_next_log
435
+ new_base_path = get_base_path
436
+ @size_counter = @current_base_path == new_base_path ? @size_counter + 1 : 0
437
+ @logger.debug("BQ: opening next log file.",
438
+ :filename => @current_base_path,
439
+ :part => @size_counter)
440
+ @current_base_path = new_base_path
441
+ @last_file_time = Time.now
442
+ open_current_file()
443
+ end
444
+
445
+ ##
446
+ # Initializes Google Client instantiating client and authorizing access.
447
+ def initialize_google_client
448
+ require "google/api_client"
449
+ require "openssl"
450
+
451
+ @client = Google::APIClient.new(:application_name =>
452
+ 'Logstash Google BigQuery output plugin',
453
+ :application_version => '0.1')
454
+ @bq = @client.discovered_api('bigquery', 'v2')
455
+
456
+
457
+ key = Google::APIClient::PKCS12.load_key(@key_path, @key_password)
458
+ # Authorization scope reference:
459
+ # https://developers.google.com/bigquery/docs/authorization
460
+ service_account = Google::APIClient::JWTAsserter.new(@service_account,
461
+ 'https://www.googleapis.com/auth/bigquery',
462
+ key)
463
+ @client.authorization = service_account.authorize
464
+ end
465
+
466
+ ##
467
+ # Uploads a local file to the configured bucket.
468
+ def get_job_status(job_id)
469
+ begin
470
+ require 'json'
471
+ @logger.debug("BQ: check job status.",
472
+ :job_id => job_id)
473
+ get_result = @client.execute(:api_method => @bq.jobs.get,
474
+ :parameters => {
475
+ 'jobId' => job_id,
476
+ 'projectId' => @project_id
477
+ })
478
+ response = JSON.parse(get_result.response.body)
479
+ @logger.debug("BQ: successfully invoked API.",
480
+ :response => response)
481
+
482
+ if response.has_key?("error")
483
+ raise response["error"]
484
+ end
485
+
486
+ # Successful invocation
487
+ contents = response["status"]
488
+ return contents
489
+ rescue => e
490
+ @logger.error("BQ: failed to check status", :exception => e)
491
+ # TODO(rdc): limit retries?
492
+ sleep 1
493
+ retry
494
+ end
495
+ end
496
+
497
+ ##
498
+ # Uploads a local file to the configured bucket.
499
+ def upload_object(filename)
500
+ begin
501
+ require 'json'
502
+ table_id = @table_prefix + "_" + get_date_pattern(filename)
503
+ # BQ does not accept anything other than alphanumeric and _
504
+ # Ref: https://developers.google.com/bigquery/browser-tool-quickstart?hl=en
505
+ table_id = table_id.gsub!(':','_').gsub!('-', '_')
506
+
507
+ @logger.debug("BQ: upload object.",
508
+ :filename => filename,
509
+ :table_id => table_id)
510
+ media = Google::APIClient::UploadIO.new(filename, "application/octet-stream")
511
+ body = {
512
+ "configuration" => {
513
+ "load" => {
514
+ "sourceFormat" => "NEWLINE_DELIMITED_JSON",
515
+ "schema" => @json_schema,
516
+ "destinationTable" => {
517
+ "projectId" => @project_id,
518
+ "datasetId" => @dataset,
519
+ "tableId" => table_id
520
+ },
521
+ 'createDisposition' => 'CREATE_IF_NEEDED',
522
+ 'writeDisposition' => 'WRITE_APPEND'
523
+ }
524
+ }
525
+ }
526
+ insert_result = @client.execute(:api_method => @bq.jobs.insert,
527
+ :body_object => body,
528
+ :parameters => {
529
+ 'uploadType' => 'multipart',
530
+ 'projectId' => @project_id
531
+ },
532
+ :media => media)
533
+
534
+ job_id = JSON.parse(insert_result.response.body)["jobReference"]["jobId"]
535
+ @logger.debug("BQ: multipart insert",
536
+ :job_id => job_id)
537
+ return job_id
538
+ rescue => e
539
+ @logger.error("BQ: failed to upload file", :exception => e)
540
+ # TODO(rdc): limit retries?
541
+ sleep 1
542
+ retry
543
+ end
544
+ end
545
+ end
546
+
547
+ ##
548
+ # Wrapper class that abstracts which IO being used (for instance, regular
549
+ # files or GzipWriter.
550
+ #
551
+ # Inspired by lib/logstash/outputs/file.rb.
552
+ class IOWriter
553
+ def initialize(io)
554
+ @io = io
555
+ end
556
+ def write(*args)
557
+ @io.write(*args)
558
+ end
559
+ def flush
560
+ @io.flush
561
+ end
562
+ def method_missing(method_name, *args, &block)
563
+ if @io.respond_to?(method_name)
564
+ @io.send(method_name, *args, &block)
565
+ else
566
+ super
567
+ end
568
+ end
569
+ attr_accessor :active
570
+ end