fluent-plugin-github-activities 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8133f90f59fd64a19ee93f670819992b65d3697c
4
- data.tar.gz: e7dea9ef571e9c2065107e7bec3358f34c47460e
3
+ metadata.gz: db6ee8ed9280b6965e10bd6ebf3248aba5a945de
4
+ data.tar.gz: 58e9f85025bcab3c4094203eb8ebca1133faed30
5
5
  SHA512:
6
- metadata.gz: 1f4921f84438dee4134f1685ce95da788eb731ed1fb2a67cc88dcde1a1be03c9a213b09dd74349e5068c1b0bb41719850a3896da82c039b4ff6a34f7bd18eb0e
7
- data.tar.gz: da531444b368936dec4c321efc61c174bd2ccd9575ea239c3f2a9c35ad0bb42cf5f63526e53058bf8c61de5ea3fced393e51a1340c24b4c320f1bdbb4ff5f43e
6
+ metadata.gz: 3a357bc0e21a4eda4069a8eee4df00e7fe7508de55cd51f9251e529d3d54ef27d3e7613d87e6ce8069afb19d2200aee458914038a9a41dc845d32541758290b7
7
+ data.tar.gz: 0177ef399a2d19968e3628c6ad3686c2e2f1e99a581be729521c85977ca1e609136c8b5742c0bab43d985a8f984be122749fe05d31d55df6a7c9d45d1861b175
data/README.md CHANGED
@@ -44,6 +44,7 @@ Notes:
44
44
 
45
45
  The configuration item `access_token` is optional but strongly recommended to be configured, because there is a rate limit: 60requests/hour by default.
46
46
  By an authenticated crawler, you can crawl 5000requests/hour (means about 80requests/minute).
47
+ See also [the guide to create new access token](https://help.github.com/articles/creating-an-access-token-for-command-line-use/).
47
48
 
48
49
  A new access token for your instance can be generated by a simple BASIC authentication, like:
49
50
 
@@ -72,6 +73,7 @@ Enter host password for user 'your-account':
72
73
  }
73
74
  ~~~
74
75
 
76
+ fluent-plugin-github-activities crawls only public activities, so you don't have to give any extra permission.
75
77
  Then the value of the `token` field is the access key to be written to the configuration file.
76
78
 
77
79
  ## Configurations
@@ -86,6 +88,9 @@ Then the value of the `token` field is the access key to be written to the confi
86
88
  # Interval seconds for requests. This is `1` by default.
87
89
  interval 1
88
90
 
91
+ # Number of clients. This is `4` by default.
92
+ clients 1
93
+
89
94
  # Path to a file to store timestamp of last crawled activity
90
95
  # for each user. If you don't specify this option, same records
91
96
  # can be forwarded after the fluentd is restarted.
@@ -19,11 +19,11 @@
19
19
 
20
20
  Gem::Specification.new do |spec|
21
21
  spec.name = "fluent-plugin-github-activities"
22
- spec.version = "0.4.0"
22
+ spec.version = "0.5.0"
23
23
  spec.authors = ["YUKI Hiroshi"]
24
24
  spec.email = ["yuki@clear-code.com"]
25
25
  spec.summary = "Fluentd plugin to crawl public activities on the GitHub."
26
- spec.description = "This provides ability you fluentd to crawl public " +
26
+ spec.description = "This provides ability to crawl public " +
27
27
  "activities of users."
28
28
  spec.homepage = "https://github.com/groonga/fluent-plugin-groonga"
29
29
  spec.license = "LGPL-3.0"
@@ -17,6 +17,7 @@
17
17
  # License along with fluent-plugin-github-activities. If not, see
18
18
  # <http://www.gnu.org/licenses/>.
19
19
 
20
+ require "fluent/plugin/github-activities/users_manager"
20
21
  require "fluent/plugin/github-activities/crawler"
21
22
 
22
23
  module Fluent
@@ -20,10 +20,9 @@
20
20
  require "uri"
21
21
  require "net/https"
22
22
  require "json"
23
- require "pathname"
24
23
  require "time"
25
24
 
26
- require "fluent/plugin/github-activities/safe_file_writer"
25
+ require "fluent/plugin/github-activities/users_manager"
27
26
 
28
27
  module Fluent
29
28
  module GithubActivities
@@ -34,15 +33,17 @@ module Fluent
34
33
  NO_INTERVAL = 0
35
34
  DEFAULT_INTERVAL = 1
36
35
 
37
- DEFAULT_LAST_EVENT_TIMESTAMP = -1
38
-
39
36
  RELATED_USER_IMAGE_KEY = "$github-activities-related-avatar"
40
37
  RELATED_ORGANIZATION_IMAGE_KEY = "$github-activities-related-organization-logo"
38
+ RELATED_EVENT = "$github-activities-related-event"
41
39
 
42
40
  attr_writer :on_emit
43
41
  attr_reader :request_queue, :interval_for_next_request
44
42
 
45
43
  def initialize(options={})
44
+ @users_manager = UsersManager.new(:users => options[:watching_users],
45
+ :pos_file => options[:pos_file])
46
+
46
47
  @access_token = options[:access_token]
47
48
 
48
49
  @watching_users = options[:watching_users] || []
@@ -50,25 +51,14 @@ module Fluent
50
51
  @include_commits_from_pull_request = options[:include_commits_from_pull_request]
51
52
  @include_foreign_commits = options[:include_foreign_commits]
52
53
 
53
- @positions = {}
54
- @pos_file = options[:pos_file]
55
- @pos_file = Pathname(@pos_file) if @pos_file
56
- load_positions
57
-
58
54
  @request_queue = options[:request_queue] || []
59
55
 
60
56
  @default_interval = options[:default_interval] || DEFAULT_INTERVAL
61
-
62
- @watching_users.each do |user|
63
- reserve_user_events(user)
64
- end
65
57
  end
66
58
 
67
59
  def process_request
68
- raise EmptyRequestQueue.new if @request_queue.empty?
69
-
70
60
  request = @request_queue.shift
71
- $log.info("GithubActivities::Crawler: processing request: #{request.inspect}") if $log
61
+ $log.debug("GithubActivities::Crawler: processing request: #{request.inspect}") if $log
72
62
  if request[:process_after] and
73
63
  Time.now.to_i < request[:process_after]
74
64
  @request_queue.push(request)
@@ -79,34 +69,36 @@ module Fluent
79
69
  uri = request_uri(request)
80
70
  extra_headers = extra_request_headers(request)
81
71
 
82
- $log.info("GithubActivities::Crawler: requesting to #{uri.inspect}") if $log
72
+ $log.debug("GithubActivities::Crawler: requesting to #{uri.inspect}") if $log
83
73
  response = http_get(uri, extra_headers)
84
- $log.info("GithubActivities::Crawler: response: #{response.inspect}") if $log
74
+ $log.debug("GithubActivities::Crawler: response: #{response.inspect}") if $log
85
75
 
86
76
  case response
87
77
  when Net::HTTPSuccess
78
+ $log.trace("GithubActivities::Crawler: Net::HTTPSuccess / request type: #{request[:type]}") if $log
88
79
  body = JSON.parse(response.body)
89
- $log.info("GithubActivities::Crawler: request type: #{request[:type]}") if $log
90
80
  case request[:type]
91
81
  when TYPE_EVENTS
92
82
  events = body
93
- $log.info("GithubActivities::Crawler: events size: #{events.size}") if $log
83
+ $log.trace("GithubActivities::Crawler: events size: #{events.size}") if $log
94
84
  process_user_events(request[:user], events)
95
85
  reserve_user_events(request[:user], :previous_response => response)
96
- save_user_position(request[:user], :entity_tag => response["ETag"])
86
+ @users_manager.save_position_for(request[:user], :entity_tag => response["ETag"])
97
87
  when TYPE_COMMIT
98
88
  process_commit(body, request[:push])
99
89
  end
100
90
  when Net::HTTPNotModified
91
+ $log.trace("GithubActivities::Crawler: Net::HTTPNotModified / request type: #{request[:type]}") if $log
101
92
  case request[:type]
102
93
  when TYPE_EVENTS
103
94
  reserve_user_events(request[:user],
104
95
  :previous_response => response,
105
96
  :previous_entity_tag => extra_headers["If-None-Match"])
106
97
  end
107
- @interval_for_next_request = NO_INTERVAL
98
+ @interval_for_next_request = @default_interval
108
99
  return true
109
- when Net::HTTPNotFound
100
+ else
101
+ $log.trace("GithubActivities::Crawler: UnknownType / request type: #{request[:type]}") if $log
110
102
  case request[:type]
111
103
  when TYPE_COMMIT
112
104
  fake_body = {
@@ -118,6 +110,8 @@ module Fluent
118
110
  end
119
111
  @interval_for_next_request = @default_interval
120
112
  return true
113
+ rescue StandardError => error
114
+ $log.error(error.inspect)
121
115
  end
122
116
 
123
117
  def request_uri(request)
@@ -134,35 +128,28 @@ module Fluent
134
128
  headers = {}
135
129
  if request[:previous_entity_tag]
136
130
  headers["If-None-Match"] = request[:previous_entity_tag]
137
- elsif request[:type] == TYPE_EVENTS and @positions[request[:user]]
138
- entity_tag = @positions[request[:user]]["entity_tag"]
139
- headers["If-None-Match"] = entity_tag if entity_tag
131
+ elsif request[:type] == TYPE_EVENTS
132
+ position = @users_manager.position_for(request[:user])
133
+ if position
134
+ entity_tag = position["entity_tag"]
135
+ headers["If-None-Match"] = entity_tag if entity_tag
136
+ end
140
137
  end
141
138
  headers
142
139
  end
143
140
 
144
141
  def reserve_user_events(user, options={})
145
- request = {
146
- :type => TYPE_EVENTS,
147
- :user => user,
148
- }
149
- response = options[:previous_response]
150
- if response
151
- now = options[:now] || Time.now
152
- interval = response["X-Poll-Interval"].to_i
153
- time_to_process = now.to_i + interval
154
- request[:previous_entity_tag] = response["ETag"] ||
155
- options[:previous_entity_tag]
156
- request[:process_after] = time_to_process
157
- end
142
+ request = @users_manager.new_events_request(user, options)
158
143
  @request_queue.push(request)
159
144
  end
160
145
 
161
146
  def process_user_events(user, events)
162
- last_event_timestamp = DEFAULT_LAST_EVENT_TIMESTAMP
163
- if @positions[user] and @positions[user]["last_event_timestamp"]
164
- last_event_timestamp = @positions[user]["last_event_timestamp"]
147
+ last_event_timestamp = UsersManager::DEFAULT_LAST_EVENT_TIMESTAMP
148
+ position = @users_manager.position_for(user)
149
+ if position and position["last_event_timestamp"]
150
+ last_event_timestamp = position["last_event_timestamp"]
165
151
  end
152
+
166
153
  events = events.sort do |a, b|
167
154
  b["created_at"] <=> a["created_at"]
168
155
  end
@@ -170,7 +157,7 @@ module Fluent
170
157
  timestamp = Time.parse(event["created_at"]).to_i
171
158
  next if timestamp <= last_event_timestamp
172
159
  process_user_event(user, event)
173
- save_user_position(user, :last_event_timestamp => timestamp)
160
+ @users_manager.save_position_for(user, :last_event_timestamp => timestamp)
174
161
  end
175
162
  end
176
163
 
@@ -198,6 +185,8 @@ module Fluent
198
185
  else
199
186
  emit(event["type"], event)
200
187
  end
188
+ rescue StandardError => error
189
+ $log.exception(error)
201
190
  end
202
191
 
203
192
  def process_push_event(event)
@@ -208,16 +197,16 @@ module Fluent
208
197
  return
209
198
  end
210
199
  commit_refs.reverse.each do |commit_ref|
211
- @request_queue.unshift(:type => TYPE_COMMIT,
212
- :uri => commit_ref["url"],
213
- :sha => commit_ref["sha"],
214
- :push => event)
200
+ @request_queue.push(:type => TYPE_COMMIT,
201
+ :uri => commit_ref["url"],
202
+ :sha => commit_ref["sha"],
203
+ :push => event)
215
204
  end
216
205
  # emit("push", event)
217
206
  end
218
207
 
219
208
  def process_commit(commit, push_event)
220
- $log.info("GithubActivities::Crawler: processing commit #{commit["sha"]}") if $log
209
+ $log.debug("GithubActivities::Crawler: processing commit #{commit["sha"]}") if $log
221
210
  user = commit["author"]["login"]
222
211
 
223
212
  if user and (@include_foreign_commits or watching_user?(user))
@@ -225,6 +214,7 @@ module Fluent
225
214
  if push_event["org"]
226
215
  commit[RELATED_ORGANIZATION_IMAGE_KEY] = push_event["org"]["avatar_url"]
227
216
  end
217
+ commit[RELATED_EVENT] = push_event
228
218
  emit("commit", commit)
229
219
  end
230
220
 
@@ -341,38 +331,6 @@ module Fluent
341
331
  end
342
332
  response
343
333
  end
344
-
345
- def load_positions
346
- return unless @pos_file
347
- return unless @pos_file.exist?
348
-
349
- @positions = JSON.parse(@pos_file.read)
350
- rescue
351
- @positions = {}
352
- end
353
-
354
- def save_positions
355
- return unless @pos_file
356
- SafeFileWriter.write(@pos_file, JSON.pretty_generate(@positions))
357
- end
358
-
359
- def save_user_position(user, params)
360
- @positions[user] ||= {}
361
-
362
- if params[:entity_tag]
363
- @positions[user]["entity_tag"] = params[:entity_tag]
364
- end
365
-
366
- if params[:last_event_timestamp] and
367
- params[:last_event_timestamp] != DEFAULT_LAST_EVENT_TIMESTAMP
368
- old_timestamp = @positions[user]["last_event_timestamp"]
369
- if old_timestamp.nil? or old_timestamp < params[:last_event_timestamp]
370
- @positions[user]["last_event_timestamp"] = params[:last_event_timestamp]
371
- end
372
- end
373
-
374
- save_positions
375
- end
376
334
  end
377
335
  end
378
336
  end
@@ -0,0 +1,103 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ require "pathname"
21
+ require "json"
22
+
23
+ require "fluent/plugin/github-activities/safe_file_writer"
24
+
25
+ module Fluent
26
+ module GithubActivities
27
+ class UsersManager
28
+ DEFAULT_LAST_EVENT_TIMESTAMP = -1
29
+
30
+ def initialize(params={})
31
+ @users = params[:users]
32
+
33
+ @positions = {}
34
+ @pos_file = params[:pos_file]
35
+ @pos_file = Pathname(@pos_file) if @pos_file
36
+ end
37
+
38
+ def generate_initial_requests
39
+ @users.collect do |user|
40
+ new_events_request(user)
41
+ end
42
+ end
43
+
44
+ def new_events_request(user, options={})
45
+ request = {
46
+ :type => TYPE_EVENTS,
47
+ :user => user,
48
+ }
49
+ response = options[:previous_response]
50
+ if response
51
+ now = options[:now] || Time.now
52
+ interval = response["X-Poll-Interval"].to_i
53
+ time_to_process = now.to_i + interval
54
+ request[:previous_entity_tag] = response["ETag"] ||
55
+ options[:previous_entity_tag]
56
+ request[:process_after] = time_to_process
57
+ else
58
+ request[:previous_entity_tag] = options[:previous_entity_tag]
59
+ end
60
+ request
61
+ end
62
+
63
+ def position_for(user)
64
+ load_positions
65
+ @positions[user]
66
+ end
67
+
68
+ def save_position_for(user, params)
69
+ load_positions
70
+ @positions[user] ||= {}
71
+
72
+ if params[:entity_tag]
73
+ @positions[user]["entity_tag"] = params[:entity_tag]
74
+ end
75
+
76
+ if params[:last_event_timestamp] and
77
+ params[:last_event_timestamp] != DEFAULT_LAST_EVENT_TIMESTAMP
78
+ old_timestamp = @positions[user]["last_event_timestamp"]
79
+ if old_timestamp.nil? or old_timestamp < params[:last_event_timestamp]
80
+ @positions[user]["last_event_timestamp"] = params[:last_event_timestamp]
81
+ end
82
+ end
83
+
84
+ save_positions
85
+ end
86
+
87
+ private
88
+ def load_positions
89
+ return unless @pos_file
90
+ return unless @pos_file.exist?
91
+
92
+ @positions = JSON.parse(@pos_file.read)
93
+ rescue
94
+ @positions = {}
95
+ end
96
+
97
+ def save_positions
98
+ return unless @pos_file
99
+ SafeFileWriter.write(@pos_file, JSON.pretty_generate(@positions))
100
+ end
101
+ end
102
+ end
103
+ end
@@ -20,6 +20,7 @@
20
20
  module Fluent
21
21
  class GithubActivitiesInput < Input
22
22
  DEFAULT_BASE_TAG = "github-activity"
23
+ DEFAULT_CLIENTS = 4
23
24
 
24
25
  Plugin.register_input("github-activities", self)
25
26
 
@@ -30,6 +31,7 @@ module Fluent
30
31
  config_param :include_foreign_commits, :bool, :default => false
31
32
  config_param :base_tag, :string, :default => DEFAULT_BASE_TAG
32
33
  config_param :pos_file, :string, :default => nil
34
+ config_param :clients, :integer, :default => DEFAULT_CLIENTS
33
35
  config_param :interval, :integer, :default => 1
34
36
 
35
37
  def initialize
@@ -42,29 +44,49 @@ module Fluent
42
44
 
43
45
  def start
44
46
  @base_tag = @base_tag.sub(/\.\z/, "")
45
- @thread = Thread.new do
46
- crawler_options = {
47
- :access_token => @access_token,
48
- :watching_users => prepare_users_list,
49
- :include_commits_from_pull_request => @include_commits_from_pull_request,
50
- :include_foreign_commits => @include_foreign_commits,
51
- :pos_file => @pos_file,
52
- :default_interval => @interval,
53
- }
54
- @crawler = ::Fluent::GithubActivities::Crawler.new(crawler_options)
55
- @crawler.on_emit = lambda do |tag, record|
56
- Engine.emit("#{@base_tag}.#{tag}", Engine.now, record)
57
- end
58
47
 
59
- loop do
60
- @crawler.process_request
61
- sleep(@crawler.interval_for_next_request)
48
+ users = prepare_users_list
49
+ n_clients = [@clients, users.size].min
50
+ @interval = @interval * n_clients
51
+
52
+ @client_threads = []
53
+ @request_queue = Queue.new
54
+
55
+ users_manager_params = {
56
+ :users => users,
57
+ :pos_file => @pos_file,
58
+ }
59
+ users_manager = ::Fluent::GithubActivities::UsersManager.new(users_manager_params)
60
+ users_manager.generate_initial_requests.each do |request|
61
+ @request_queue.push(request)
62
+ end
63
+
64
+ n_clients.times do
65
+ @client_threads << Thread.new do
66
+ crawler_options = {
67
+ :access_token => @access_token,
68
+ :watching_users => users,
69
+ :include_commits_from_pull_request => @include_commits_from_pull_request,
70
+ :include_foreign_commits => @include_foreign_commits,
71
+ :pos_file => @pos_file,
72
+ :request_queue => @request_queue,
73
+ :default_interval => @interval,
74
+ }
75
+ crawler = ::Fluent::GithubActivities::Crawler.new(crawler_options)
76
+ crawler.on_emit = lambda do |tag, record|
77
+ Engine.emit("#{@base_tag}.#{tag}", Engine.now, record)
78
+ end
79
+
80
+ loop do
81
+ crawler.process_request
82
+ sleep(crawler.interval_for_next_request)
83
+ end
62
84
  end
63
85
  end
64
86
  end
65
87
 
66
88
  def shutdown
67
- @thread.exit
89
+ @client_threads.each(&:exit)
68
90
  end
69
91
 
70
92
  private
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-github-activities
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - YUKI Hiroshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-02 00:00:00.000000000 Z
11
+ date: 2015-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description: This provides ability you fluentd to crawl public activities of users.
97
+ description: This provides ability to crawl public activities of users.
98
98
  email:
99
99
  - yuki@clear-code.com
100
100
  executables: []
@@ -107,6 +107,7 @@ files:
107
107
  - lib/fluent/plugin/github-activities.rb
108
108
  - lib/fluent/plugin/github-activities/crawler.rb
109
109
  - lib/fluent/plugin/github-activities/safe_file_writer.rb
110
+ - lib/fluent/plugin/github-activities/users_manager.rb
110
111
  - lib/fluent/plugin/in_github-activities.rb
111
112
  - test/fixture.rb
112
113
  - test/fixture/accept-pull-request-event.json