fluent-plugin-github-activities 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f6761994ec06ed9516a6b7c9abe3ec44f77bfdbe
4
+ data.tar.gz: f8709e6fa06915f13ea6a3890382f84769c38465
5
+ SHA512:
6
+ metadata.gz: 5e1251b591cfdc869b64f10f5f726cc5e637edb21be2c77350460d46ded5ef9c01e5a9d4160998aa983ab05eeac62ad82477d3b356c65fb828e83ee393fe41d7
7
+ data.tar.gz: cd5c50121621d5100c2b3f6aa0070fe306aaf4d72a1615b3d5da220b819549975b2d324a97850fcb654ce7aa04343a8f0712ead41dcd0b3570e138b4b3ddbec7
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ # -*- mode: ruby; coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ source "https://rubygems.org/"
21
+
22
+ gemspec
@@ -0,0 +1,87 @@
1
+ # fluent-plugin-github-activities
2
+
3
+ Provides ability to watch public activities on GitHub.
4
+ This crawls GitHub activities of specified users and forward each activity as a record.
5
+
6
+ ## Supported activity types
7
+
8
+ * Activities related to commits
9
+ * `push`
10
+ * `commit` (See also following notes)
11
+ * `commit-comment`
12
+ * Activities related to repositories
13
+ * `fork`
14
+ * `branch`
15
+ * `tag`
16
+ * Activities related to issues
17
+ * `issue-open`
18
+ * `issue-close`
19
+ * `issue-reopen`
20
+ * `issue-assign`
21
+ * `issue-unassign`
22
+ * `issue-label`
23
+ * `issue-unlabel`
24
+ * Activities related to pull requests
25
+ * `pull-request`
26
+ * `pull-request-merged`
27
+ * `pull-request-cancelled`
28
+ * `pull-request-reopen`
29
+ * `pull-request-comment`
30
+
31
+ Forwarded message is same to an activity provided by GitHub.
32
+ See also [the API documentations of GitHub activity events](https://developer.github.com/v3/activity/events/).
33
+
34
+ Notes:
35
+
36
+ * Because a "push" activity doesn't include full information of each commit, commits are separately forwarded [commits](https://developer.github.com/v3/git/commits/) as pseudo `commit` activities.
37
+ * All forwarded records have an extra property `$github-activities-related-avatar`.
38
+ It will be useful to get the URI of the avatar image easily, for both activity events and commits.
39
+ * Unsupported activities are also forwarded with their raw event type like `StatusEvent`.
40
+
41
+
42
+ ## Configurations
43
+
44
+ ~~~
45
+ <source>
46
+ type github-activities
47
+
48
+ # Authentication settings.
49
+ # They are optional but strongly recommended to be configured,
50
+ # because there is a rate limit: 60requests/hour by default.
51
+ # By an authenticated crawler, you can crawl 5000requests/hour
52
+ # (means about 80requests/minute).
53
+ basic_username your-user-name-of-github
54
+ basic_password your-password-of-github
55
+
56
+ # Interval seconds for requests. This is `1` by default.
57
+ interval 1
58
+
59
+ # Path to a file to store timestamp of last crawled activity
60
+ # for each user. If you don't specify this option, same records
61
+ # can be forwarded after the fluentd is restarted.
62
+ pos_file /tmp/github-activities.json
63
+
64
+ # Base tag of forwarded records. It will be used as
65
+ # <base_tag>.<activity type>, like: "github-activity.push",
66
+ # "github-activity.StatusEvent", etc.
67
+ base_tag github-activity.
68
+
69
+ # The lisf of target users' account IDs on the GitHub to be crawled.
70
+ users ashie,co-me,cosmo0920,hayamiz,hhatto,kenhys,kou
71
+ # External list is also available.
72
+ #users_list /path/to/list/of/users
73
+
74
+ # Merged pull requests will provide push and commit activities,
75
+ # so you possibly see same commits twice when a pull request by
76
+ # a known user (in the list above) is merged by another known user.
77
+ # To avoid such annoying duplicated records, they are ignored by
78
+ # default. If you hope those records are also forwarded, set this
79
+ # option `true` manually.
80
+ #include_commits_from_pull_request true
81
+
82
+ # Pull requests can include commits by unknown users (out of the
83
+ # list above) and the crawler ignores such users' commits by default.
84
+ # To include those commit activities, set this option `true` manually.
85
+ #include_foreign_commits true
86
+ </source>
87
+ ~~~
@@ -0,0 +1,43 @@
1
+ # -*- mode: ruby; coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ Gem::Specification.new do |spec|
21
+ spec.name = "fluent-plugin-github-activities"
22
+ spec.version = "0.1.0"
23
+ spec.authors = ["YUKI Hiroshi"]
24
+ spec.email = ["yuki@clear-code.com"]
25
+ spec.summary = "Fluentd plugin to crawl public activities on the GitHub."
26
+ spec.description = "This provides ability you fluentd to crawl public " +
27
+ "activities of users."
28
+ spec.homepage = "https://github.com/groonga/fluent-plugin-groonga"
29
+ spec.license = "LGPL-3.0"
30
+
31
+ spec.files = ["README.md", "Gemfile", "#{spec.name}.gemspec"]
32
+ spec.files += Dir.glob("lib/**/*.rb")
33
+ spec.test_files += Dir.glob("test/**/*")
34
+ spec.require_paths = ["lib"]
35
+
36
+ spec.add_runtime_dependency("fluentd")
37
+
38
+ spec.add_development_dependency("rake")
39
+ spec.add_development_dependency("bundler")
40
+ spec.add_development_dependency("packnga", ">= 1.0.1")
41
+ spec.add_development_dependency("test-unit")
42
+ spec.add_development_dependency("test-unit-notify")
43
+ end
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ require "fluent/plugin/github-activities/crawler"
21
+
22
+ module Fluent
23
+ module GithubActivities
24
+ TYPE_EVENTS = :events
25
+ TYPE_COMMIT = :commit
26
+ end
27
+ end
@@ -0,0 +1,368 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ require "uri"
21
+ require "net/https"
22
+ require "json"
23
+ require "pathname"
24
+
25
+ require "fluent/plugin/github-activities/safe_file_writer"
26
+
27
+ module Fluent
28
+ module GithubActivities
29
+ class Crawler
30
+ class EmptyRequestQueue < StandardError
31
+ end
32
+
33
+ NO_INTERVAL = 0
34
+ DEFAULT_INTERVAL = 1
35
+
36
+ DEFAULT_LAST_EVENT_TIMESTAMP = -1
37
+
38
+ RELATED_USER_IMAGE_KEY = "$github-activities-related-avatar"
39
+
40
+ attr_writer :on_emit
41
+ attr_reader :request_queue, :interval_for_next_request
42
+
43
+ def initialize(options={})
44
+ @username = options[:username]
45
+ @password = options[:password]
46
+
47
+ @watching_users = options[:watching_users] || []
48
+
49
+ @include_commits_from_pull_request = options[:include_commits_from_pull_request]
50
+ @include_foreign_commits = options[:include_foreign_commits]
51
+
52
+ @positions = {}
53
+ @pos_file = options[:pos_file]
54
+ @pos_file = Pathname(@pos_file) if @pos_file
55
+ load_positions
56
+
57
+ @avatars = {}
58
+
59
+ @request_queue = options[:request_queue] || []
60
+
61
+ @default_interval = options[:default_interval] || DEFAULT_INTERVAL
62
+
63
+ @watching_users.each do |user|
64
+ fetch_avatar(user)
65
+ reserve_user_events(user)
66
+ end
67
+ end
68
+
69
+ def process_request
70
+ raise EmptyRequestQueue.new if @request_queue.empty?
71
+
72
+ request = @request_queue.shift
73
+ $log.info("GithubActivities::Crawler: processing request: #{request.inspect}")
74
+ if request[:process_after] and
75
+ Time.now.to_i < request[:process_after]
76
+ @request_queue.push(request)
77
+ @interval_for_next_request = NO_INTERVAL
78
+ return false
79
+ end
80
+
81
+ uri = request_uri(request)
82
+ extra_headers = extra_request_headers(request)
83
+
84
+ $log.info("GithubActivities::Crawler: requesting to #{uri.inspect}")
85
+ response = http_get(uri, extra_headers)
86
+ $log.info("GithubActivities::Crawler: response: #{response.inspect}")
87
+
88
+ case response
89
+ when Net::HTTPSuccess
90
+ body = JSON.parse(response.body)
91
+ $log.info("GithubActivities::Crawler: request type: #{request[:type]}")
92
+ case request[:type]
93
+ when TYPE_EVENTS
94
+ events = body
95
+ $log.info("GithubActivities::Crawler: events size: #{events.size}")
96
+ process_user_events(request[:user], events)
97
+ reserve_user_events(request[:user], :previous_response => response)
98
+ save_user_position(request[:user], :entity_tag => response["ETag"])
99
+ when TYPE_COMMIT
100
+ process_commit(body, request[:push])
101
+ end
102
+ when Net::HTTPNotModified
103
+ case request[:type]
104
+ when TYPE_EVENTS
105
+ reserve_user_events(request[:user],
106
+ :previous_response => response,
107
+ :previous_entity_tag => extra_headers["If-None-Match"])
108
+ end
109
+ @interval_for_next_request = NO_INTERVAL
110
+ return true
111
+ end
112
+ @interval_for_next_request = @default_interval
113
+ return true
114
+ end
115
+
116
+ def request_uri(request)
117
+ uri = nil
118
+ case request[:type]
119
+ when TYPE_EVENTS
120
+ uri = user_activities(request[:user])
121
+ else
122
+ uri = request[:uri]
123
+ end
124
+ end
125
+
126
+ def extra_request_headers(request)
127
+ headers = {}
128
+ if request[:previous_entity_tag]
129
+ headers["If-None-Match"] = request[:previous_entity_tag]
130
+ elsif request[:type] == TYPE_EVENTS and @positions[request[:user]]
131
+ entity_tag = @positions[request[:user]]["entity_tag"]
132
+ headers["If-None-Match"] = entity_tag if entity_tag
133
+ end
134
+ headers
135
+ end
136
+
137
+ def reserve_user_events(user, options={})
138
+ request = {
139
+ :type => TYPE_EVENTS,
140
+ :user => user,
141
+ }
142
+ response = options[:previous_response]
143
+ if response
144
+ now = options[:now] || Time.now
145
+ interval = response["X-Poll-Interval"].to_i
146
+ time_to_process = now.to_i + interval
147
+ request[:previous_entity_tag] = response["ETag"] ||
148
+ options[:previous_entity_tag]
149
+ request[:process_after] = time_to_process
150
+ end
151
+ @request_queue.push(request)
152
+ end
153
+
154
+ def process_user_events(user, events)
155
+ last_event_timestamp = DEFAULT_LAST_EVENT_TIMESTAMP
156
+ if @positions[user] and @positions[user]["last_event_timestamp"]
157
+ last_event_timestamp = @positions[user]["last_event_timestamp"]
158
+ end
159
+ events.each do |event|
160
+ timestamp = Time.parse(event["created_at"]).to_i
161
+ next if timestamp <= last_event_timestamp
162
+ process_user_event(user, event)
163
+ save_user_position(user, :last_event_timestamp => timestamp)
164
+ end
165
+ end
166
+
167
+ def process_user_event(user, event)
168
+ # see also: https://developer.github.com/v3/activity/events/types/
169
+ event[RELATED_USER_IMAGE_KEY] = @avatars[user]
170
+ case event["type"]
171
+ when "PushEvent"
172
+ process_push_event(event)
173
+ when "CommitCommentEvent"
174
+ emit("commit-comment", event)
175
+ when "IssuesEvent"
176
+ process_issue_event(event)
177
+ when "IssueCommentEvent"
178
+ process_issue_or_pull_request_comment_event(event)
179
+ when "ForkEvent"
180
+ emit("fork", event)
181
+ when "PullRequestEvent"
182
+ process_pull_request_event(event)
183
+ when "CreateEvent"
184
+ process_create_event(event)
185
+ else
186
+ emit(event["type"], event)
187
+ end
188
+ end
189
+
190
+ def process_push_event(event)
191
+ payload = event["payload"]
192
+ commit_refs = payload["commits"]
193
+ if !@include_commits_from_pull_request and
194
+ push_event_from_merged_pull_request?(event)
195
+ return
196
+ end
197
+ commit_refs.reverse.each do |commit_ref|
198
+ @request_queue.unshift(:type => TYPE_COMMIT,
199
+ :uri => commit_ref["url"],
200
+ :push => event)
201
+ end
202
+ # emit("push", event)
203
+ end
204
+
205
+ def process_commit(commit, push_event)
206
+ user = commit["author"]["login"]
207
+ fetch_avatar(user)
208
+
209
+ if @include_foreign_commits or watching_user?(user)
210
+ commit[RELATED_USER_IMAGE_KEY] = @avatars[user]
211
+ emit("commit", commit)
212
+ end
213
+
214
+ commit_refs = push_event["payload"]["commits"]
215
+ target_commit_ref = commit_refs.find do |commit_ref|
216
+ commit_ref["url"] == commit["url"]
217
+ end
218
+ target_commit_ref["commit"] = commit if target_commit_ref
219
+
220
+ completely_fetched = commit_refs.all? do |commit_ref|
221
+ commit_ref["commit"]
222
+ end
223
+ emit("push", push_event) if completely_fetched
224
+ end
225
+
226
+ def watching_user?(user)
227
+ @watching_users.include(user)
228
+ end
229
+
230
+ def process_issue_event(event)
231
+ payload = event["payload"]
232
+ case payload["action"]
233
+ when "opened"
234
+ emit("issue-open", event)
235
+ when "closed"
236
+ emit("issue-close", event)
237
+ when "reopened"
238
+ emit("issue-reopen", event)
239
+ when "assigned"
240
+ emit("issue-assign", event)
241
+ when "unassigned"
242
+ emit("issue-unassign", event)
243
+ when "labeled"
244
+ emit("issue-label", event)
245
+ when "unlabeled"
246
+ emit("issue-unlabel", event)
247
+ end
248
+ end
249
+
250
+ def process_pull_request_event(event)
251
+ payload = event["payload"]
252
+ case payload["action"]
253
+ when "opened"
254
+ emit("pull-request", event)
255
+ when "closed"
256
+ if payload["pull_request"]["merged"]
257
+ emit("pull-request-merged", event)
258
+ else
259
+ emit("pull-request-cancelled", event)
260
+ end
261
+ when "reopened"
262
+ emit("pull-request-reopen", event)
263
+ end
264
+ end
265
+
266
+ MERGE_COMMIT_MESSAGE_PATTERN = /\AMerge pull request #\d+ from [^\/]+\/[^\/]+\n\n/
267
+
268
+ def push_event_from_merged_pull_request?(event)
269
+ payload = event["payload"]
270
+ inserted_requests = []
271
+ commit_refs = payload["commits"]
272
+ if MERGE_COMMIT_MESSAGE_PATTERN =~ commit_refs.last["message"]
273
+ true
274
+ else
275
+ false
276
+ end
277
+ end
278
+
279
+ def process_issue_or_pull_request_comment_event(event)
280
+ payload = event["payload"]
281
+ if payload["issue"]["pull_request"]
282
+ emit("pull-request-comment", event)
283
+ # emit("pull-request.cancel", event)
284
+ else
285
+ emit("issue-comment", event)
286
+ end
287
+ end
288
+
289
+ def process_create_event(event)
290
+ payload = event["payload"]
291
+ case payload["ref_type"]
292
+ when "branch"
293
+ emit("branch", event)
294
+ when "tag"
295
+ emit("tag", event)
296
+ end
297
+ end
298
+
299
+ def fetch_avatar(user)
300
+ return if @avatars.key?(user)
301
+ response = http_get(user_info(user))
302
+ fetched_user_info = JSON.parse(response.body)
303
+ @avatars[user] = fetched_user_info["avatar_url"]
304
+ end
305
+
306
+ private
307
+ def user_activities(user)
308
+ "https://api.github.com/users/#{user}/events/public"
309
+ end
310
+
311
+ def user_info(user)
312
+ "https://api.github.com/users/#{user}"
313
+ end
314
+
315
+ def emit(tag, record)
316
+ $log.trace("GithubActivities::Crawler: emit => #{tag}, #{record.inspect}")
317
+ @on_emit.call(tag, record) if @on_emit
318
+ end
319
+
320
+ def http_get(uri, extra_headers={})
321
+ parsed_uri = URI(uri)
322
+ response = nil
323
+ http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
324
+ http.use_ssl = parsed_uri.is_a?(URI::HTTPS)
325
+ http.start do |http|
326
+ http_request = Net::HTTP::Get.new(parsed_uri.path, extra_headers)
327
+ if @username and @password
328
+ http_request.basic_auth(@username, @password)
329
+ end
330
+ response = http.request(http_request)
331
+ end
332
+ response
333
+ end
334
+
335
+ def load_positions
336
+ return unless @pos_file
337
+ return unless @pos_file.exist?
338
+
339
+ @positions = JSON.parse(@pos_file.read)
340
+ rescue
341
+ @positions = {}
342
+ end
343
+
344
+ def save_positions
345
+ return unless @pos_file
346
+ SafeFileWriter.write(@pos_file, JSON.pretty_generate(@positions))
347
+ end
348
+
349
+ def save_user_position(user, params)
350
+ @positions[user] ||= {}
351
+
352
+ if params[:entity_tag]
353
+ @positions[user]["entity_tag"] = params[:entity_tag]
354
+ end
355
+
356
+ if params[:last_event_timestamp] and
357
+ params[:last_event_timestamp] != DEFAULT_LAST_EVENT_TIMESTAMP
358
+ old_timestamp = @positions[user]["last_event_timestamp"]
359
+ if old_timestamp.nil? or old_timestamp < params[:last_event_timestamp]
360
+ @positions[user]["last_event_timestamp"] = params[:last_event_timestamp]
361
+ end
362
+ end
363
+
364
+ save_positions
365
+ end
366
+ end
367
+ end
368
+ end