fluent-plugin-github-activities 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8133f90f59fd64a19ee93f670819992b65d3697c
4
- data.tar.gz: e7dea9ef571e9c2065107e7bec3358f34c47460e
3
+ metadata.gz: db6ee8ed9280b6965e10bd6ebf3248aba5a945de
4
+ data.tar.gz: 58e9f85025bcab3c4094203eb8ebca1133faed30
5
5
  SHA512:
6
- metadata.gz: 1f4921f84438dee4134f1685ce95da788eb731ed1fb2a67cc88dcde1a1be03c9a213b09dd74349e5068c1b0bb41719850a3896da82c039b4ff6a34f7bd18eb0e
7
- data.tar.gz: da531444b368936dec4c321efc61c174bd2ccd9575ea239c3f2a9c35ad0bb42cf5f63526e53058bf8c61de5ea3fced393e51a1340c24b4c320f1bdbb4ff5f43e
6
+ metadata.gz: 3a357bc0e21a4eda4069a8eee4df00e7fe7508de55cd51f9251e529d3d54ef27d3e7613d87e6ce8069afb19d2200aee458914038a9a41dc845d32541758290b7
7
+ data.tar.gz: 0177ef399a2d19968e3628c6ad3686c2e2f1e99a581be729521c85977ca1e609136c8b5742c0bab43d985a8f984be122749fe05d31d55df6a7c9d45d1861b175
data/README.md CHANGED
@@ -44,6 +44,7 @@ Notes:
44
44
 
45
45
  The configuration item `access_token` is optional but strongly recommended to be configured, because there is a rate limit: 60requests/hour by default.
46
46
  By an authenticated crawler, you can crawl 5000requests/hour (means about 80requests/minute).
47
+ See also [the guide to create new access token](https://help.github.com/articles/creating-an-access-token-for-command-line-use/).
47
48
 
48
49
  A new access token for your instance can be generated by a simple BASIC authentication, like:
49
50
 
@@ -72,6 +73,7 @@ Enter host password for user 'your-account':
72
73
  }
73
74
  ~~~
74
75
 
76
+ fluent-plugin-github-activities crawls only public activities, so you don't have to give any extra permission.
75
77
  Then the value of the `token` field is the access key to be written to the configuration file.
76
78
 
77
79
  ## Configurations
@@ -86,6 +88,9 @@ Then the value of the `token` field is the access key to be written to the confi
86
88
  # Interval seconds for requests. This is `1` by default.
87
89
  interval 1
88
90
 
91
+ # Number of clients. This is `4` by default.
92
+ clients 1
93
+
89
94
  # Path to a file to store timestamp of last crawled activity
90
95
  # for each user. If you don't specify this option, same records
91
96
  # can be forwarded after the fluentd is restarted.
@@ -19,11 +19,11 @@
19
19
 
20
20
  Gem::Specification.new do |spec|
21
21
  spec.name = "fluent-plugin-github-activities"
22
- spec.version = "0.4.0"
22
+ spec.version = "0.5.0"
23
23
  spec.authors = ["YUKI Hiroshi"]
24
24
  spec.email = ["yuki@clear-code.com"]
25
25
  spec.summary = "Fluentd plugin to crawl public activities on the GitHub."
26
- spec.description = "This provides ability you fluentd to crawl public " +
26
+ spec.description = "This provides ability to crawl public " +
27
27
  "activities of users."
28
28
  spec.homepage = "https://github.com/groonga/fluent-plugin-groonga"
29
29
  spec.license = "LGPL-3.0"
@@ -17,6 +17,7 @@
17
17
  # License along with fluent-plugin-github-activities. If not, see
18
18
  # <http://www.gnu.org/licenses/>.
19
19
 
20
+ require "fluent/plugin/github-activities/users_manager"
20
21
  require "fluent/plugin/github-activities/crawler"
21
22
 
22
23
  module Fluent
@@ -20,10 +20,9 @@
20
20
  require "uri"
21
21
  require "net/https"
22
22
  require "json"
23
- require "pathname"
24
23
  require "time"
25
24
 
26
- require "fluent/plugin/github-activities/safe_file_writer"
25
+ require "fluent/plugin/github-activities/users_manager"
27
26
 
28
27
  module Fluent
29
28
  module GithubActivities
@@ -34,15 +33,17 @@ module Fluent
34
33
  NO_INTERVAL = 0
35
34
  DEFAULT_INTERVAL = 1
36
35
 
37
- DEFAULT_LAST_EVENT_TIMESTAMP = -1
38
-
39
36
  RELATED_USER_IMAGE_KEY = "$github-activities-related-avatar"
40
37
  RELATED_ORGANIZATION_IMAGE_KEY = "$github-activities-related-organization-logo"
38
+ RELATED_EVENT = "$github-activities-related-event"
41
39
 
42
40
  attr_writer :on_emit
43
41
  attr_reader :request_queue, :interval_for_next_request
44
42
 
45
43
  def initialize(options={})
44
+ @users_manager = UsersManager.new(:users => options[:watching_users],
45
+ :pos_file => options[:pos_file])
46
+
46
47
  @access_token = options[:access_token]
47
48
 
48
49
  @watching_users = options[:watching_users] || []
@@ -50,25 +51,14 @@ module Fluent
50
51
  @include_commits_from_pull_request = options[:include_commits_from_pull_request]
51
52
  @include_foreign_commits = options[:include_foreign_commits]
52
53
 
53
- @positions = {}
54
- @pos_file = options[:pos_file]
55
- @pos_file = Pathname(@pos_file) if @pos_file
56
- load_positions
57
-
58
54
  @request_queue = options[:request_queue] || []
59
55
 
60
56
  @default_interval = options[:default_interval] || DEFAULT_INTERVAL
61
-
62
- @watching_users.each do |user|
63
- reserve_user_events(user)
64
- end
65
57
  end
66
58
 
67
59
  def process_request
68
- raise EmptyRequestQueue.new if @request_queue.empty?
69
-
70
60
  request = @request_queue.shift
71
- $log.info("GithubActivities::Crawler: processing request: #{request.inspect}") if $log
61
+ $log.debug("GithubActivities::Crawler: processing request: #{request.inspect}") if $log
72
62
  if request[:process_after] and
73
63
  Time.now.to_i < request[:process_after]
74
64
  @request_queue.push(request)
@@ -79,34 +69,36 @@ module Fluent
79
69
  uri = request_uri(request)
80
70
  extra_headers = extra_request_headers(request)
81
71
 
82
- $log.info("GithubActivities::Crawler: requesting to #{uri.inspect}") if $log
72
+ $log.debug("GithubActivities::Crawler: requesting to #{uri.inspect}") if $log
83
73
  response = http_get(uri, extra_headers)
84
- $log.info("GithubActivities::Crawler: response: #{response.inspect}") if $log
74
+ $log.debug("GithubActivities::Crawler: response: #{response.inspect}") if $log
85
75
 
86
76
  case response
87
77
  when Net::HTTPSuccess
78
+ $log.trace("GithubActivities::Crawler: Net::HTTPSuccess / request type: #{request[:type]}") if $log
88
79
  body = JSON.parse(response.body)
89
- $log.info("GithubActivities::Crawler: request type: #{request[:type]}") if $log
90
80
  case request[:type]
91
81
  when TYPE_EVENTS
92
82
  events = body
93
- $log.info("GithubActivities::Crawler: events size: #{events.size}") if $log
83
+ $log.trace("GithubActivities::Crawler: events size: #{events.size}") if $log
94
84
  process_user_events(request[:user], events)
95
85
  reserve_user_events(request[:user], :previous_response => response)
96
- save_user_position(request[:user], :entity_tag => response["ETag"])
86
+ @users_manager.save_position_for(request[:user], :entity_tag => response["ETag"])
97
87
  when TYPE_COMMIT
98
88
  process_commit(body, request[:push])
99
89
  end
100
90
  when Net::HTTPNotModified
91
+ $log.trace("GithubActivities::Crawler: Net::HTTPNotModified / request type: #{request[:type]}") if $log
101
92
  case request[:type]
102
93
  when TYPE_EVENTS
103
94
  reserve_user_events(request[:user],
104
95
  :previous_response => response,
105
96
  :previous_entity_tag => extra_headers["If-None-Match"])
106
97
  end
107
- @interval_for_next_request = NO_INTERVAL
98
+ @interval_for_next_request = @default_interval
108
99
  return true
109
- when Net::HTTPNotFound
100
+ else
101
+ $log.trace("GithubActivities::Crawler: UnknownType / request type: #{request[:type]}") if $log
110
102
  case request[:type]
111
103
  when TYPE_COMMIT
112
104
  fake_body = {
@@ -118,6 +110,8 @@ module Fluent
118
110
  end
119
111
  @interval_for_next_request = @default_interval
120
112
  return true
113
+ rescue StandardError => error
114
+ $log.error(error.inspect)
121
115
  end
122
116
 
123
117
  def request_uri(request)
@@ -134,35 +128,28 @@ module Fluent
134
128
  headers = {}
135
129
  if request[:previous_entity_tag]
136
130
  headers["If-None-Match"] = request[:previous_entity_tag]
137
- elsif request[:type] == TYPE_EVENTS and @positions[request[:user]]
138
- entity_tag = @positions[request[:user]]["entity_tag"]
139
- headers["If-None-Match"] = entity_tag if entity_tag
131
+ elsif request[:type] == TYPE_EVENTS
132
+ position = @users_manager.position_for(request[:user])
133
+ if position
134
+ entity_tag = position["entity_tag"]
135
+ headers["If-None-Match"] = entity_tag if entity_tag
136
+ end
140
137
  end
141
138
  headers
142
139
  end
143
140
 
144
141
  def reserve_user_events(user, options={})
145
- request = {
146
- :type => TYPE_EVENTS,
147
- :user => user,
148
- }
149
- response = options[:previous_response]
150
- if response
151
- now = options[:now] || Time.now
152
- interval = response["X-Poll-Interval"].to_i
153
- time_to_process = now.to_i + interval
154
- request[:previous_entity_tag] = response["ETag"] ||
155
- options[:previous_entity_tag]
156
- request[:process_after] = time_to_process
157
- end
142
+ request = @users_manager.new_events_request(user, options)
158
143
  @request_queue.push(request)
159
144
  end
160
145
 
161
146
  def process_user_events(user, events)
162
- last_event_timestamp = DEFAULT_LAST_EVENT_TIMESTAMP
163
- if @positions[user] and @positions[user]["last_event_timestamp"]
164
- last_event_timestamp = @positions[user]["last_event_timestamp"]
147
+ last_event_timestamp = UsersManager::DEFAULT_LAST_EVENT_TIMESTAMP
148
+ position = @users_manager.position_for(user)
149
+ if position and position["last_event_timestamp"]
150
+ last_event_timestamp = position["last_event_timestamp"]
165
151
  end
152
+
166
153
  events = events.sort do |a, b|
167
154
  b["created_at"] <=> a["created_at"]
168
155
  end
@@ -170,7 +157,7 @@ module Fluent
170
157
  timestamp = Time.parse(event["created_at"]).to_i
171
158
  next if timestamp <= last_event_timestamp
172
159
  process_user_event(user, event)
173
- save_user_position(user, :last_event_timestamp => timestamp)
160
+ @users_manager.save_position_for(user, :last_event_timestamp => timestamp)
174
161
  end
175
162
  end
176
163
 
@@ -198,6 +185,8 @@ module Fluent
198
185
  else
199
186
  emit(event["type"], event)
200
187
  end
188
+ rescue StandardError => error
189
+ $log.exception(error)
201
190
  end
202
191
 
203
192
  def process_push_event(event)
@@ -208,16 +197,16 @@ module Fluent
208
197
  return
209
198
  end
210
199
  commit_refs.reverse.each do |commit_ref|
211
- @request_queue.unshift(:type => TYPE_COMMIT,
212
- :uri => commit_ref["url"],
213
- :sha => commit_ref["sha"],
214
- :push => event)
200
+ @request_queue.push(:type => TYPE_COMMIT,
201
+ :uri => commit_ref["url"],
202
+ :sha => commit_ref["sha"],
203
+ :push => event)
215
204
  end
216
205
  # emit("push", event)
217
206
  end
218
207
 
219
208
  def process_commit(commit, push_event)
220
- $log.info("GithubActivities::Crawler: processing commit #{commit["sha"]}") if $log
209
+ $log.debug("GithubActivities::Crawler: processing commit #{commit["sha"]}") if $log
221
210
  user = commit["author"]["login"]
222
211
 
223
212
  if user and (@include_foreign_commits or watching_user?(user))
@@ -225,6 +214,7 @@ module Fluent
225
214
  if push_event["org"]
226
215
  commit[RELATED_ORGANIZATION_IMAGE_KEY] = push_event["org"]["avatar_url"]
227
216
  end
217
+ commit[RELATED_EVENT] = push_event
228
218
  emit("commit", commit)
229
219
  end
230
220
 
@@ -341,38 +331,6 @@ module Fluent
341
331
  end
342
332
  response
343
333
  end
344
-
345
- def load_positions
346
- return unless @pos_file
347
- return unless @pos_file.exist?
348
-
349
- @positions = JSON.parse(@pos_file.read)
350
- rescue
351
- @positions = {}
352
- end
353
-
354
- def save_positions
355
- return unless @pos_file
356
- SafeFileWriter.write(@pos_file, JSON.pretty_generate(@positions))
357
- end
358
-
359
- def save_user_position(user, params)
360
- @positions[user] ||= {}
361
-
362
- if params[:entity_tag]
363
- @positions[user]["entity_tag"] = params[:entity_tag]
364
- end
365
-
366
- if params[:last_event_timestamp] and
367
- params[:last_event_timestamp] != DEFAULT_LAST_EVENT_TIMESTAMP
368
- old_timestamp = @positions[user]["last_event_timestamp"]
369
- if old_timestamp.nil? or old_timestamp < params[:last_event_timestamp]
370
- @positions[user]["last_event_timestamp"] = params[:last_event_timestamp]
371
- end
372
- end
373
-
374
- save_positions
375
- end
376
334
  end
377
335
  end
378
336
  end
@@ -0,0 +1,103 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # This file is part of fluent-plugin-github-activities.
4
+ #
5
+ # fluent-plugin-github-activities is free software: you can
6
+ # redistribute it and/or modify it under the terms of the GNU Lesser
7
+ # General Public License as published by the Free Software
8
+ # Foundation, either version 3 of the License, or (at your option)
9
+ # any later version.
10
+ #
11
+ # fluent-plugin-github-activities is distributed in the hope that
12
+ # it will be useful, but WITHOUT ANY WARRANTY; without even the
13
+ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14
+ # PURPOSE. See the GNU Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with fluent-plugin-github-activities. If not, see
18
+ # <http://www.gnu.org/licenses/>.
19
+
20
+ require "pathname"
21
+ require "json"
22
+
23
+ require "fluent/plugin/github-activities/safe_file_writer"
24
+
25
+ module Fluent
26
+ module GithubActivities
27
+ class UsersManager
28
+ DEFAULT_LAST_EVENT_TIMESTAMP = -1
29
+
30
+ def initialize(params={})
31
+ @users = params[:users]
32
+
33
+ @positions = {}
34
+ @pos_file = params[:pos_file]
35
+ @pos_file = Pathname(@pos_file) if @pos_file
36
+ end
37
+
38
+ def generate_initial_requests
39
+ @users.collect do |user|
40
+ new_events_request(user)
41
+ end
42
+ end
43
+
44
+ def new_events_request(user, options={})
45
+ request = {
46
+ :type => TYPE_EVENTS,
47
+ :user => user,
48
+ }
49
+ response = options[:previous_response]
50
+ if response
51
+ now = options[:now] || Time.now
52
+ interval = response["X-Poll-Interval"].to_i
53
+ time_to_process = now.to_i + interval
54
+ request[:previous_entity_tag] = response["ETag"] ||
55
+ options[:previous_entity_tag]
56
+ request[:process_after] = time_to_process
57
+ else
58
+ request[:previous_entity_tag] = options[:previous_entity_tag]
59
+ end
60
+ request
61
+ end
62
+
63
+ def position_for(user)
64
+ load_positions
65
+ @positions[user]
66
+ end
67
+
68
+ def save_position_for(user, params)
69
+ load_positions
70
+ @positions[user] ||= {}
71
+
72
+ if params[:entity_tag]
73
+ @positions[user]["entity_tag"] = params[:entity_tag]
74
+ end
75
+
76
+ if params[:last_event_timestamp] and
77
+ params[:last_event_timestamp] != DEFAULT_LAST_EVENT_TIMESTAMP
78
+ old_timestamp = @positions[user]["last_event_timestamp"]
79
+ if old_timestamp.nil? or old_timestamp < params[:last_event_timestamp]
80
+ @positions[user]["last_event_timestamp"] = params[:last_event_timestamp]
81
+ end
82
+ end
83
+
84
+ save_positions
85
+ end
86
+
87
+ private
88
+ def load_positions
89
+ return unless @pos_file
90
+ return unless @pos_file.exist?
91
+
92
+ @positions = JSON.parse(@pos_file.read)
93
+ rescue
94
+ @positions = {}
95
+ end
96
+
97
+ def save_positions
98
+ return unless @pos_file
99
+ SafeFileWriter.write(@pos_file, JSON.pretty_generate(@positions))
100
+ end
101
+ end
102
+ end
103
+ end
@@ -20,6 +20,7 @@
20
20
  module Fluent
21
21
  class GithubActivitiesInput < Input
22
22
  DEFAULT_BASE_TAG = "github-activity"
23
+ DEFAULT_CLIENTS = 4
23
24
 
24
25
  Plugin.register_input("github-activities", self)
25
26
 
@@ -30,6 +31,7 @@ module Fluent
30
31
  config_param :include_foreign_commits, :bool, :default => false
31
32
  config_param :base_tag, :string, :default => DEFAULT_BASE_TAG
32
33
  config_param :pos_file, :string, :default => nil
34
+ config_param :clients, :integer, :default => DEFAULT_CLIENTS
33
35
  config_param :interval, :integer, :default => 1
34
36
 
35
37
  def initialize
@@ -42,29 +44,49 @@ module Fluent
42
44
 
43
45
  def start
44
46
  @base_tag = @base_tag.sub(/\.\z/, "")
45
- @thread = Thread.new do
46
- crawler_options = {
47
- :access_token => @access_token,
48
- :watching_users => prepare_users_list,
49
- :include_commits_from_pull_request => @include_commits_from_pull_request,
50
- :include_foreign_commits => @include_foreign_commits,
51
- :pos_file => @pos_file,
52
- :default_interval => @interval,
53
- }
54
- @crawler = ::Fluent::GithubActivities::Crawler.new(crawler_options)
55
- @crawler.on_emit = lambda do |tag, record|
56
- Engine.emit("#{@base_tag}.#{tag}", Engine.now, record)
57
- end
58
47
 
59
- loop do
60
- @crawler.process_request
61
- sleep(@crawler.interval_for_next_request)
48
+ users = prepare_users_list
49
+ n_clients = [@clients, users.size].min
50
+ @interval = @interval * n_clients
51
+
52
+ @client_threads = []
53
+ @request_queue = Queue.new
54
+
55
+ users_manager_params = {
56
+ :users => users,
57
+ :pos_file => @pos_file,
58
+ }
59
+ users_manager = ::Fluent::GithubActivities::UsersManager.new(users_manager_params)
60
+ users_manager.generate_initial_requests.each do |request|
61
+ @request_queue.push(request)
62
+ end
63
+
64
+ n_clients.times do
65
+ @client_threads << Thread.new do
66
+ crawler_options = {
67
+ :access_token => @access_token,
68
+ :watching_users => users,
69
+ :include_commits_from_pull_request => @include_commits_from_pull_request,
70
+ :include_foreign_commits => @include_foreign_commits,
71
+ :pos_file => @pos_file,
72
+ :request_queue => @request_queue,
73
+ :default_interval => @interval,
74
+ }
75
+ crawler = ::Fluent::GithubActivities::Crawler.new(crawler_options)
76
+ crawler.on_emit = lambda do |tag, record|
77
+ Engine.emit("#{@base_tag}.#{tag}", Engine.now, record)
78
+ end
79
+
80
+ loop do
81
+ crawler.process_request
82
+ sleep(crawler.interval_for_next_request)
83
+ end
62
84
  end
63
85
  end
64
86
  end
65
87
 
66
88
  def shutdown
67
- @thread.exit
89
+ @client_threads.each(&:exit)
68
90
  end
69
91
 
70
92
  private
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-github-activities
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - YUKI Hiroshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-02 00:00:00.000000000 Z
11
+ date: 2015-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description: This provides ability you fluentd to crawl public activities of users.
97
+ description: This provides ability to crawl public activities of users.
98
98
  email:
99
99
  - yuki@clear-code.com
100
100
  executables: []
@@ -107,6 +107,7 @@ files:
107
107
  - lib/fluent/plugin/github-activities.rb
108
108
  - lib/fluent/plugin/github-activities/crawler.rb
109
109
  - lib/fluent/plugin/github-activities/safe_file_writer.rb
110
+ - lib/fluent/plugin/github-activities/users_manager.rb
110
111
  - lib/fluent/plugin/in_github-activities.rb
111
112
  - test/fixture.rb
112
113
  - test/fixture/accept-pull-request-event.json