fluent-plugin-github-activities 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f6761994ec06ed9516a6b7c9abe3ec44f77bfdbe
4
+ data.tar.gz: f8709e6fa06915f13ea6a3890382f84769c38465
5
+ SHA512:
6
+ metadata.gz: 5e1251b591cfdc869b64f10f5f726cc5e637edb21be2c77350460d46ded5ef9c01e5a9d4160998aa983ab05eeac62ad82477d3b356c65fb828e83ee393fe41d7
7
+ data.tar.gz: cd5c50121621d5100c2b3f6aa0070fe306aaf4d72a1615b3d5da220b819549975b2d324a97850fcb654ce7aa04343a8f0712ead41dcd0b3570e138b4b3ddbec7
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ # -*- mode: ruby; coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ source "https://rubygems.org/"
21
+
22
+ gemspec
@@ -0,0 +1,87 @@
1
+ # fluent-plugin-github-activities
2
+
3
+ Provides ability to watch public activities on GitHub.
4
+ This crawls GitHub activities of specified users and forward each activity as a record.
5
+
6
+ ## Supported activity types
7
+
8
+ * Activities related to commits
9
+ * `push`
10
+ * `commit` (See also following notes)
11
+ * `commit-comment`
12
+ * Activities related to repositories
13
+ * `fork`
14
+ * `branch`
15
+ * `tag`
16
+ * Activities related to issues
17
+ * `issue-open`
18
+ * `issue-close`
19
+ * `issue-reopen`
20
+ * `issue-assign`
21
+ * `issue-unassign`
22
+ * `issue-label`
23
+ * `issue-unlabel`
24
+ * Activities related to pull requests
25
+ * `pull-request`
26
+ * `pull-request-merged`
27
+ * `pull-request-cancelled`
28
+ * `pull-request-reopen`
29
+ * `pull-request-comment`
30
+
31
+ Forwarded message is same to an activity provided by GitHub.
32
+ See also [the API documentations of GitHub activity events](https://developer.github.com/v3/activity/events/).
33
+
34
+ Notes:
35
+
36
+ * Because a "push" activity doesn't include full information of each commit, commits are separately forwarded [commits](https://developer.github.com/v3/git/commits/) as pseudo `commit` activities.
37
+ * All forwarded records have an extra property `$github-activities-related-avatar`.
38
+ It will be useful to get the URI of the avatar image easily, for both activity events and commits.
39
+ * Unsupported activities are also forwarded with their raw event type like `StatusEvent`.
40
+
41
+
42
+ ## Configurations
43
+
44
+ ~~~
45
+ <source>
46
+ type github-activities
47
+
48
+ # Authentication settings.
49
+ # They are optional but strongly recommended to be configured,
50
+ # because there is a rate limit: 60requests/hour by default.
51
+ # By an authenticated crawler, you can crawl 5000requests/hour
52
+ # (means about 80requests/minute).
53
+ basic_username your-user-name-of-github
54
+ basic_password your-password-of-github
55
+
56
+ # Interval seconds for requests. This is `1` by default.
57
+ interval 1
58
+
59
+ # Path to a file to store timestamp of last crawled activity
60
+ # for each user. If you don't specify this option, same records
61
+ # can be forwarded after the fluentd is restarted.
62
+ pos_file /tmp/github-activities.json
63
+
64
+ # Base tag of forwarded records. It will be used as
65
+ # <base_tag>.<activity type>, like: "github-activity.push",
66
+ # "github-activity.StatusEvent", etc.
67
+ base_tag github-activity.
68
+
69
+ # The lisf of target users' account IDs on the GitHub to be crawled.
70
+ users ashie,co-me,cosmo0920,hayamiz,hhatto,kenhys,kou
71
+ # External list is also available.
72
+ #users_list /path/to/list/of/users
73
+
74
+ # Merged pull requests will provide push and commit activities,
75
+ # so you possibly see same commits twice when a pull request by
76
+ # a known user (in the list above) is merged by another known user.
77
+ # To avoid such annoying duplicated records, they are ignored by
78
+ # default. If you hope those records are also forwarded, set this
79
+ # option `true` manually.
80
+ #include_commits_from_pull_request true
81
+
82
+ # Pull requests can include commits by unknown users (out of the
83
+ # list above) and the crawler ignores such users' commits by default.
84
+ # To include those commit activities, set this option `true` manually.
85
+ #include_foreign_commits true
86
+ </source>
87
+ ~~~
@@ -0,0 +1,43 @@
1
+ # -*- mode: ruby; coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ Gem::Specification.new do |spec|
21
+ spec.name = "fluent-plugin-github-activities"
22
+ spec.version = "0.1.0"
23
+ spec.authors = ["YUKI Hiroshi"]
24
+ spec.email = ["yuki@clear-code.com"]
25
+ spec.summary = "Fluentd plugin to crawl public activities on the GitHub."
26
+ spec.description = "This provides ability you fluentd to crawl public " +
27
+ "activities of users."
28
+ spec.homepage = "https://github.com/groonga/fluent-plugin-groonga"
29
+ spec.license = "LGPL-3.0"
30
+
31
+ spec.files = ["README.md", "Gemfile", "#{spec.name}.gemspec"]
32
+ spec.files += Dir.glob("lib/**/*.rb")
33
+ spec.test_files += Dir.glob("test/**/*")
34
+ spec.require_paths = ["lib"]
35
+
36
+ spec.add_runtime_dependency("fluentd")
37
+
38
+ spec.add_development_dependency("rake")
39
+ spec.add_development_dependency("bundler")
40
+ spec.add_development_dependency("packnga", ">= 1.0.1")
41
+ spec.add_development_dependency("test-unit")
42
+ spec.add_development_dependency("test-unit-notify")
43
+ end
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ require "fluent/plugin/github-activities/crawler"
21
+
22
+ module Fluent
23
+ module GithubActivities
24
+ TYPE_EVENTS = :events
25
+ TYPE_COMMIT = :commit
26
+ end
27
+ end
@@ -0,0 +1,368 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ require "uri"
21
+ require "net/https"
22
+ require "json"
23
+ require "pathname"
24
+
25
+ require "fluent/plugin/github-activities/safe_file_writer"
26
+
27
+ module Fluent
28
+ module GithubActivities
29
+ class Crawler
30
+ class EmptyRequestQueue < StandardError
31
+ end
32
+
33
+ NO_INTERVAL = 0
34
+ DEFAULT_INTERVAL = 1
35
+
36
+ DEFAULT_LAST_EVENT_TIMESTAMP = -1
37
+
38
+ RELATED_USER_IMAGE_KEY = "$github-activities-related-avatar"
39
+
40
+ attr_writer :on_emit
41
+ attr_reader :request_queue, :interval_for_next_request
42
+
43
+ def initialize(options={})
44
+ @username = options[:username]
45
+ @password = options[:password]
46
+
47
+ @watching_users = options[:watching_users] || []
48
+
49
+ @include_commits_from_pull_request = options[:include_commits_from_pull_request]
50
+ @include_foreign_commits = options[:include_foreign_commits]
51
+
52
+ @positions = {}
53
+ @pos_file = options[:pos_file]
54
+ @pos_file = Pathname(@pos_file) if @pos_file
55
+ load_positions
56
+
57
+ @avatars = {}
58
+
59
+ @request_queue = options[:request_queue] || []
60
+
61
+ @default_interval = options[:default_interval] || DEFAULT_INTERVAL
62
+
63
+ @watching_users.each do |user|
64
+ fetch_avatar(user)
65
+ reserve_user_events(user)
66
+ end
67
+ end
68
+
69
+ def process_request
70
+ raise EmptyRequestQueue.new if @request_queue.empty?
71
+
72
+ request = @request_queue.shift
73
+ $log.info("GithubActivities::Crawler: processing request: #{request.inspect}")
74
+ if request[:process_after] and
75
+ Time.now.to_i < request[:process_after]
76
+ @request_queue.push(request)
77
+ @interval_for_next_request = NO_INTERVAL
78
+ return false
79
+ end
80
+
81
+ uri = request_uri(request)
82
+ extra_headers = extra_request_headers(request)
83
+
84
+ $log.info("GithubActivities::Crawler: requesting to #{uri.inspect}")
85
+ response = http_get(uri, extra_headers)
86
+ $log.info("GithubActivities::Crawler: response: #{response.inspect}")
87
+
88
+ case response
89
+ when Net::HTTPSuccess
90
+ body = JSON.parse(response.body)
91
+ $log.info("GithubActivities::Crawler: request type: #{request[:type]}")
92
+ case request[:type]
93
+ when TYPE_EVENTS
94
+ events = body
95
+ $log.info("GithubActivities::Crawler: events size: #{events.size}")
96
+ process_user_events(request[:user], events)
97
+ reserve_user_events(request[:user], :previous_response => response)
98
+ save_user_position(request[:user], :entity_tag => response["ETag"])
99
+ when TYPE_COMMIT
100
+ process_commit(body, request[:push])
101
+ end
102
+ when Net::HTTPNotModified
103
+ case request[:type]
104
+ when TYPE_EVENTS
105
+ reserve_user_events(request[:user],
106
+ :previous_response => response,
107
+ :previous_entity_tag => extra_headers["If-None-Match"])
108
+ end
109
+ @interval_for_next_request = NO_INTERVAL
110
+ return true
111
+ end
112
+ @interval_for_next_request = @default_interval
113
+ return true
114
+ end
115
+
116
+ def request_uri(request)
117
+ uri = nil
118
+ case request[:type]
119
+ when TYPE_EVENTS
120
+ uri = user_activities(request[:user])
121
+ else
122
+ uri = request[:uri]
123
+ end
124
+ end
125
+
126
+ def extra_request_headers(request)
127
+ headers = {}
128
+ if request[:previous_entity_tag]
129
+ headers["If-None-Match"] = request[:previous_entity_tag]
130
+ elsif request[:type] == TYPE_EVENTS and @positions[request[:user]]
131
+ entity_tag = @positions[request[:user]]["entity_tag"]
132
+ headers["If-None-Match"] = entity_tag if entity_tag
133
+ end
134
+ headers
135
+ end
136
+
137
+ def reserve_user_events(user, options={})
138
+ request = {
139
+ :type => TYPE_EVENTS,
140
+ :user => user,
141
+ }
142
+ response = options[:previous_response]
143
+ if response
144
+ now = options[:now] || Time.now
145
+ interval = response["X-Poll-Interval"].to_i
146
+ time_to_process = now.to_i + interval
147
+ request[:previous_entity_tag] = response["ETag"] ||
148
+ options[:previous_entity_tag]
149
+ request[:process_after] = time_to_process
150
+ end
151
+ @request_queue.push(request)
152
+ end
153
+
154
+ def process_user_events(user, events)
155
+ last_event_timestamp = DEFAULT_LAST_EVENT_TIMESTAMP
156
+ if @positions[user] and @positions[user]["last_event_timestamp"]
157
+ last_event_timestamp = @positions[user]["last_event_timestamp"]
158
+ end
159
+ events.each do |event|
160
+ timestamp = Time.parse(event["created_at"]).to_i
161
+ next if timestamp <= last_event_timestamp
162
+ process_user_event(user, event)
163
+ save_user_position(user, :last_event_timestamp => timestamp)
164
+ end
165
+ end
166
+
167
+ def process_user_event(user, event)
168
+ # see also: https://developer.github.com/v3/activity/events/types/
169
+ event[RELATED_USER_IMAGE_KEY] = @avatars[user]
170
+ case event["type"]
171
+ when "PushEvent"
172
+ process_push_event(event)
173
+ when "CommitCommentEvent"
174
+ emit("commit-comment", event)
175
+ when "IssuesEvent"
176
+ process_issue_event(event)
177
+ when "IssueCommentEvent"
178
+ process_issue_or_pull_request_comment_event(event)
179
+ when "ForkEvent"
180
+ emit("fork", event)
181
+ when "PullRequestEvent"
182
+ process_pull_request_event(event)
183
+ when "CreateEvent"
184
+ process_create_event(event)
185
+ else
186
+ emit(event["type"], event)
187
+ end
188
+ end
189
+
190
+ def process_push_event(event)
191
+ payload = event["payload"]
192
+ commit_refs = payload["commits"]
193
+ if !@include_commits_from_pull_request and
194
+ push_event_from_merged_pull_request?(event)
195
+ return
196
+ end
197
+ commit_refs.reverse.each do |commit_ref|
198
+ @request_queue.unshift(:type => TYPE_COMMIT,
199
+ :uri => commit_ref["url"],
200
+ :push => event)
201
+ end
202
+ # emit("push", event)
203
+ end
204
+
205
+ def process_commit(commit, push_event)
206
+ user = commit["author"]["login"]
207
+ fetch_avatar(user)
208
+
209
+ if @include_foreign_commits or watching_user?(user)
210
+ commit[RELATED_USER_IMAGE_KEY] = @avatars[user]
211
+ emit("commit", commit)
212
+ end
213
+
214
+ commit_refs = push_event["payload"]["commits"]
215
+ target_commit_ref = commit_refs.find do |commit_ref|
216
+ commit_ref["url"] == commit["url"]
217
+ end
218
+ target_commit_ref["commit"] = commit if target_commit_ref
219
+
220
+ completely_fetched = commit_refs.all? do |commit_ref|
221
+ commit_ref["commit"]
222
+ end
223
+ emit("push", push_event) if completely_fetched
224
+ end
225
+
226
+ def watching_user?(user)
227
+ @watching_users.include(user)
228
+ end
229
+
230
+ def process_issue_event(event)
231
+ payload = event["payload"]
232
+ case payload["action"]
233
+ when "opened"
234
+ emit("issue-open", event)
235
+ when "closed"
236
+ emit("issue-close", event)
237
+ when "reopened"
238
+ emit("issue-reopen", event)
239
+ when "assigned"
240
+ emit("issue-assign", event)
241
+ when "unassigned"
242
+ emit("issue-unassign", event)
243
+ when "labeled"
244
+ emit("issue-label", event)
245
+ when "unlabeled"
246
+ emit("issue-unlabel", event)
247
+ end
248
+ end
249
+
250
+ def process_pull_request_event(event)
251
+ payload = event["payload"]
252
+ case payload["action"]
253
+ when "opened"
254
+ emit("pull-request", event)
255
+ when "closed"
256
+ if payload["pull_request"]["merged"]
257
+ emit("pull-request-merged", event)
258
+ else
259
+ emit("pull-request-cancelled", event)
260
+ end
261
+ when "reopened"
262
+ emit("pull-request-reopen", event)
263
+ end
264
+ end
265
+
266
+ MERGE_COMMIT_MESSAGE_PATTERN = /\AMerge pull request #\d+ from [^\/]+\/[^\/]+\n\n/
267
+
268
+ def push_event_from_merged_pull_request?(event)
269
+ payload = event["payload"]
270
+ inserted_requests = []
271
+ commit_refs = payload["commits"]
272
+ if MERGE_COMMIT_MESSAGE_PATTERN =~ commit_refs.last["message"]
273
+ true
274
+ else
275
+ false
276
+ end
277
+ end
278
+
279
+ def process_issue_or_pull_request_comment_event(event)
280
+ payload = event["payload"]
281
+ if payload["issue"]["pull_request"]
282
+ emit("pull-request-comment", event)
283
+ # emit("pull-request.cancel", event)
284
+ else
285
+ emit("issue-comment", event)
286
+ end
287
+ end
288
+
289
+ def process_create_event(event)
290
+ payload = event["payload"]
291
+ case payload["ref_type"]
292
+ when "branch"
293
+ emit("branch", event)
294
+ when "tag"
295
+ emit("tag", event)
296
+ end
297
+ end
298
+
299
+ def fetch_avatar(user)
300
+ return if @avatars.key?(user)
301
+ response = http_get(user_info(user))
302
+ fetched_user_info = JSON.parse(response.body)
303
+ @avatars[user] = fetched_user_info["avatar_url"]
304
+ end
305
+
306
+ private
307
+ def user_activities(user)
308
+ "https://api.github.com/users/#{user}/events/public"
309
+ end
310
+
311
+ def user_info(user)
312
+ "https://api.github.com/users/#{user}"
313
+ end
314
+
315
+ def emit(tag, record)
316
+ $log.trace("GithubActivities::Crawler: emit => #{tag}, #{record.inspect}")
317
+ @on_emit.call(tag, record) if @on_emit
318
+ end
319
+
320
+ def http_get(uri, extra_headers={})
321
+ parsed_uri = URI(uri)
322
+ response = nil
323
+ http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
324
+ http.use_ssl = parsed_uri.is_a?(URI::HTTPS)
325
+ http.start do |http|
326
+ http_request = Net::HTTP::Get.new(parsed_uri.path, extra_headers)
327
+ if @username and @password
328
+ http_request.basic_auth(@username, @password)
329
+ end
330
+ response = http.request(http_request)
331
+ end
332
+ response
333
+ end
334
+
335
+ def load_positions
336
+ return unless @pos_file
337
+ return unless @pos_file.exist?
338
+
339
+ @positions = JSON.parse(@pos_file.read)
340
+ rescue
341
+ @positions = {}
342
+ end
343
+
344
+ def save_positions
345
+ return unless @pos_file
346
+ SafeFileWriter.write(@pos_file, JSON.pretty_generate(@positions))
347
+ end
348
+
349
+ def save_user_position(user, params)
350
+ @positions[user] ||= {}
351
+
352
+ if params[:entity_tag]
353
+ @positions[user]["entity_tag"] = params[:entity_tag]
354
+ end
355
+
356
+ if params[:last_event_timestamp] and
357
+ params[:last_event_timestamp] != DEFAULT_LAST_EVENT_TIMESTAMP
358
+ old_timestamp = @positions[user]["last_event_timestamp"]
359
+ if old_timestamp.nil? or old_timestamp < params[:last_event_timestamp]
360
+ @positions[user]["last_event_timestamp"] = params[:last_event_timestamp]
361
+ end
362
+ end
363
+
364
+ save_positions
365
+ end
366
+ end
367
+ end
368
+ end