ghtorrent 0.10 → 0.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +16 -0
  3. data/Gemfile.lock +12 -27
  4. data/README.md +20 -33
  5. data/Rakefile +1 -9
  6. data/bin/ght-log-analyzer +11 -6
  7. data/bin/ght-log-influx +190 -0
  8. data/bin/ght-queue-grep.rb +55 -0
  9. data/bin/ght-retrieve-users +6 -0
  10. data/bin/{ght-rm-dupl → ght-update-repo} +1 -1
  11. data/lib/ghtorrent.rb +4 -4
  12. data/lib/ghtorrent/adapters/base_adapter.rb +4 -11
  13. data/lib/ghtorrent/adapters/mongo_persister.rb +5 -9
  14. data/lib/ghtorrent/adapters/noop_persister.rb +0 -5
  15. data/lib/ghtorrent/api_client.rb +45 -119
  16. data/lib/ghtorrent/command.rb +25 -8
  17. data/lib/ghtorrent/commands/full_user_retriever.rb +50 -0
  18. data/lib/ghtorrent/commands/ght_data_retrieval.rb +12 -98
  19. data/lib/ghtorrent/commands/ght_get_more_commits.rb +13 -17
  20. data/lib/ghtorrent/commands/ght_load.rb +1 -2
  21. data/lib/ghtorrent/commands/ght_mirror_events.rb +8 -12
  22. data/lib/ghtorrent/commands/ght_retrieve_dependents.rb +0 -5
  23. data/lib/ghtorrent/commands/ght_retrieve_one.rb +1 -6
  24. data/lib/ghtorrent/commands/ght_retrieve_repo.rb +56 -26
  25. data/lib/ghtorrent/commands/ght_retrieve_repos.rb +5 -15
  26. data/lib/ghtorrent/commands/ght_retrieve_user.rb +13 -54
  27. data/lib/ghtorrent/commands/ght_retrieve_users.rb +49 -0
  28. data/lib/ghtorrent/commands/ght_update_repo.rb +126 -0
  29. data/lib/ghtorrent/event_processing.rb +140 -0
  30. data/lib/ghtorrent/ghtorrent.rb +330 -396
  31. data/lib/ghtorrent/logging.rb +65 -12
  32. data/lib/ghtorrent/migrations/014_add_deleted_to_projects.rb +1 -1
  33. data/lib/ghtorrent/migrations/019_add_fake_to_users.rb +1 -1
  34. data/lib/ghtorrent/migrations/020_add_deleted_to_users.rb +19 -0
  35. data/lib/ghtorrent/migrations/021_remove_ext_ref_id.rb +42 -0
  36. data/lib/ghtorrent/migrations/022_add_project_languages.rb +24 -0
  37. data/lib/ghtorrent/multiprocess_queue_client.rb +25 -5
  38. data/lib/ghtorrent/retriever.rb +100 -57
  39. data/lib/ghtorrent/settings.rb +14 -17
  40. data/lib/ghtorrent/{transacted_ghtorrent.rb → transacted_gh_torrent.rb} +28 -5
  41. data/lib/version.rb +1 -1
  42. metadata +14 -46
  43. data/bin/ght-process-event +0 -35
  44. data/lib/ghtorrent/cache.rb +0 -97
  45. data/lib/ghtorrent/commands/ght_rm_dupl.rb +0 -132
  46. data/lib/ghtorrent/gh_torrent_exception.rb +0 -6
  47. data/spec/api_client_spec.rb +0 -42
  48. data/spec/spec_helper.rb +0 -21
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1394c5a3869bab8d02cf61fc8108ca1f10afc0b8
4
- data.tar.gz: 9d82ecf68d0316db32f962af8cb7d0070dd74c82
3
+ metadata.gz: 68b11841a8dfbd0418723fce0620a5d625b4cca1
4
+ data.tar.gz: d39f30596d257cfe5cb365e1365388169e50cc18
5
5
  SHA512:
6
- metadata.gz: 7dbfe453c542137cafafa641c83bb680749e946c86193447e2b6aee829b88370bc66e90a8ea3d64a33d3c0061a7c5b01fa292ee96b94fba6dcca910d472d0f7f
7
- data.tar.gz: d5c6487264b3a4d17af08e33ac2babd17b63ca6849e1ebd694a1d6b639797e8e870664f7dcde79718d61350551971a0c4b99b5d0ec56923b31984227c69b49bc
6
+ metadata.gz: 6443373ff38703c8113c23716db591a1574eb7f2e36eba1ae68d78ce322b20e42f1c0c840c3a113bb870b7c8b5aae817cb07a60f46d6bccb07ed4058cabc23d2
7
+ data.tar.gz: cfad88e464fad602f38f6f4cfb7963a0b7ac28f5c205ddf954939937c398d513e004725ef70e920b682b06239c2adc6663c3a29f6eea90913aad3611c9c2a310
data/CHANGELOG CHANGED
@@ -1,3 +1,19 @@
1
+ = Version 0.11
2
+ * Retrieve members by processing MemberEvents to counter API change
3
+ * Removed the request caching layer. Requests are cached in the persister
4
+ * Change default DB isolation to REPEATABLE READ for stronger isolation
5
+ * Finer-grained (commit level) transactions when processing forks
6
+ * More accurate and uniform logging
7
+ * Tool to push logs to InfluxDB for monitoring
8
+ * Drop ext_ref_id from all tables
9
+ * More efficient retrieval of events, 100 in one go
10
+ * Tool to retrieve all user details and support for marking users deleted
11
+ * Support for retrieving repo events when using ght-retrieve-repo
12
+ * Non-recursive retrieval of pull requests leads to 1/3 API calls
13
+ * Custom rate limits for Github API tokens
14
+ * Tooling for MySQL dumps in CSV files
15
+ * General bug fixes and cleanups
16
+
1
17
  = Version 0.10
2
18
  * Base class for multiprocess queue clients
3
19
  * Make retrieval of pull request commits faster
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ghtorrent (0.9)
4
+ ghtorrent (0.11)
5
5
  bson_ext (~> 1.9, >= 1.9.0)
6
6
  bunny (~> 1.0, >= 1.0.0)
7
7
  mongo (~> 1.9, >= 1.9.0)
@@ -11,32 +11,16 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- addressable (2.3.5)
15
14
  amq-protocol (1.9.2)
16
- bson (1.10.0)
17
- bson_ext (1.10.0)
18
- bson (~> 1.10.0)
19
- bunny (1.2.1)
15
+ bson (1.12.2)
16
+ bson_ext (1.12.2)
17
+ bson (~> 1.12.2)
18
+ bunny (1.3.1)
20
19
  amq-protocol (>= 1.9.2)
21
- crack (0.4.1)
22
- safe_yaml (~> 0.9.0)
23
- diff-lcs (1.2.5)
24
- mongo (1.10.0)
25
- bson (~> 1.10.0)
26
- rspec (2.14.1)
27
- rspec-core (~> 2.14.0)
28
- rspec-expectations (~> 2.14.0)
29
- rspec-mocks (~> 2.14.0)
30
- rspec-core (2.14.7)
31
- rspec-expectations (2.14.4)
32
- diff-lcs (>= 1.1.3, < 2.0)
33
- rspec-mocks (2.14.4)
34
- safe_yaml (0.9.7)
35
- sequel (4.10.0)
36
- trollop (2.0)
37
- webmock (1.16.0)
38
- addressable (>= 2.2.7)
39
- crack (>= 0.3.2)
20
+ mongo (1.12.2)
21
+ bson (= 1.12.2)
22
+ sequel (4.23.0)
23
+ trollop (2.1.2)
40
24
 
41
25
  PLATFORMS
42
26
  ruby
@@ -44,5 +28,6 @@ PLATFORMS
44
28
  DEPENDENCIES
45
29
  ghtorrent!
46
30
  jdbc-mysql
47
- rspec (~> 2.14, >= 2.14.0)
48
- webmock (~> 1.16)
31
+
32
+ BUNDLED WITH
33
+ 1.10.2
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # ghtorrent: Mirror and process data from the Github API
1
+ # ghtorrent: Mirror and index data from the Github API
2
2
 
3
3
  A library and a collection of scripts used to retrieve data from the Github API
4
4
  and extract metadata in an SQL database, in a modular and scalable manner. The
@@ -10,8 +10,7 @@ GHTorrent can be used for a variety of purposes, such as:
10
10
  * Mirror the Github API event stream and follow links from events to actual data
11
11
  to gradually build a [Github index](http://ghtorrent.org/)
12
12
  * Create a queriable metadata index for a specific repository
13
- * Query the Github API using intelligent caching to avoid duplicate queries
14
-
13
+ * Construct a data source for [extracting process analytics](http://www.gousios.gr/blog/ghtorrent-project-statistics/) (see for example [those](http://ghtorrent.org/pullreq-perf/))for one or more repositories
15
14
 
16
15
  ## Components
17
16
 
@@ -19,8 +18,8 @@ GHTorrents components (which can be used individually) are:
19
18
 
20
19
  * [APIClient](https://github.com/gousiosg/github-mirror/blob/master/lib/ghtorrent/api_client.rb): Knows how to query the Github API (both single entities and
21
20
  pages) and respect the API request limit. Can be configured to override the
22
- default IP address, in case of multihomed hosts. Uses configurable on disk [caching](https://github.com/gousiosg/github-mirror/blob/master/lib/ghtorrent/cache.rb) to avoid retrieving data that do not change.
23
- * [Retriever](https://github.com/gousiosg/github-mirror/blob/master/lib/ghtorrent/retriever.rb): Knows how to retrieve specific Github entities (users, repositories, watchers) by name. Uses an optional persister to avoid
21
+ default IP address, in case of multihomed hosts.
22
+ * [Retriever](https://github.com/gousiosg/github-mirror/blob/master/lib/ghtorrent/retriever.rb): Knows how to retrieve specific Github entities (users, repositories, watchers) by name. Uses an optional persister to avoid
24
23
  retrieving data that have not changed.
25
24
  * [Persister](https://github.com/gousiosg/github-mirror/blob/master/lib/ghtorrent/persister.rb): A key/value store, which can be backed by a real key/value store,
26
25
  to store Github JSON replies and query them on request. The backing key/value
@@ -28,25 +27,23 @@ store must support arbitrary queries to the stored JSON objects.
28
27
  * [GHTorrent](https://github.com/gousiosg/github-mirror/blob/master/lib/ghtorrent/ghtorrent.rb): Knows how to extract information from the data retrieved by
29
28
  the retriever in order to update an SQL database (see [schema](http://ghtorrent.org/relational.html)) with metadata.
30
29
 
31
- ### Component Configuration
30
+ ### Component Configuration
32
31
 
33
32
  The Persister and GHTorrent components have configurable back ends:
34
33
 
35
34
  * **Persister:** Either uses MongoDB > 2.0 (`mongo` driver) or no persister (`noop` driver)
36
- * **GHTorrent:** GHTorrent is tested mainly with MySQL, but can theoretically be
37
- used with any SQL database compatible with [Sequel](http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html). Your milaege may vary.
38
-
39
- The distributed mirroring scripts also require RabbitMQ >= 2.8 or other
35
+ * **GHTorrent:** GHTorrent is tested mainly with MySQL and SQLite, but can theoretically be used with any SQL database compatible with [Sequel](http://sequel.rubyforge.org/rdoc/files/doc/opening_databases_rdoc.html). Your milaege may vary.
40
36
 
37
+ For distributed mirroring you also need RabbitMQ >= 3
41
38
 
42
39
  ## Installation
43
40
 
44
41
 
45
42
  ### 1. Install GHTorrent
46
- GHTorrent is written in Ruby (tested with 1.9). To install it as a Gem do:
43
+ GHTorrent is written in Ruby (tested with 2.0). To install it as a Gem do:
47
44
 
48
45
  <code>
49
- sudo gem install ghtorrent
46
+ sudo gem install ghtorrent
50
47
  </code>
51
48
 
52
49
 
@@ -56,14 +53,14 @@ Depending on which SQL database you want to use, install the appropriate
56
53
  dependency gem.
57
54
 
58
55
  <code>
59
- sudo gem install mysql2 # or <sqlite3-ruby|postgres>
56
+ sudo gem install mysql2 # or sqlite3
60
57
  </code>
61
58
 
62
59
 
63
60
  ## Configuration
64
61
 
65
62
  Copy [config.yaml.tmpl](https://github.com/gousiosg/github-mirror/blob/master/config.yaml.tmpl)
66
- to a file in your home directory.
63
+ to a file in your home directory.
67
64
 
68
65
  All provided scripts accept the `-c` option, which accepts the location of the configuration file as
69
66
  a parameter.
@@ -74,7 +71,7 @@ to retrieve data in parallel on the [Wiki](https://github.com/gousiosg/github-mi
74
71
 
75
72
  ## Using GHTorrent
76
73
 
77
- To mirror the event stream and capture all data:
74
+ To mirror the event stream and capture all data:
78
75
 
79
76
  * `ght-mirror-events.rb` periodically polls Github's event
80
77
  queue (`https://api.github.com/events`), stores all new events in the
@@ -85,7 +82,7 @@ RabbitMQ.
85
82
  functions. The functions use the appropriate Github API call to retrieve the
86
83
  linked contents, extract metadata (for database storage), and store the
87
84
  retrieved data in the appropriate collection in the persister, to avoid
88
- duplicate API calls.
85
+ duplicate API calls.
89
86
  Data in the SQL database contain pointers (the `ext_ref_id` field) to the
90
87
  "raw" data in the persister.
91
88
 
@@ -98,32 +95,29 @@ To perform maintenance:
98
95
 
99
96
  * `ght-load` loads selected events from the persister to the queue in order for
100
97
  the `ght-data-retrieval` script to reprocess them
101
- * `ght-get-more-commits` retrieves all commits for a specific repository
102
-
103
98
 
104
- ### Data Torrents
99
+ ### Data
105
100
 
106
- You can find torrents for retrieving data on the
107
- [Available Torrents](https://ghtorrent.org/downloads.html) page.
101
+ The code in this repository is used to power the data collection process of
102
+ the [GHTorrent.org](http://ghtorrent.org/) project.
103
+ You can find all data collected by in the project in the
104
+ [Downloads](https://ghtorrent.org/downloads.html) page.
108
105
 
109
106
  There are two sets of data:
110
107
 
111
108
  * **Raw events:** Github's [event stream](https://api.github.com/events). These
112
109
  are the roots for mirroring operations. The `ght-data-retrieval` crawler starts
113
110
  from an event and goes deep into the rabbit hole.
114
- * **SQL dumps + Linked data:** Data dumps from the SQL database and the corresponding
115
- MongoDB entities.
116
-
111
+ * **SQL dumps + Linked data:** Data dumps from the SQL database and the corresponding MongoDB entities.
117
112
 
118
113
  ## Bugs & Feature Requests
119
114
 
120
- Please tell us about features you'd like or bugs you've discovered on our
115
+ Please tell us about features you'd like or bugs you've discovered on our
121
116
  [Issue Tracker](https://github.com/gousiosg/github-mirror/issues).
122
117
 
123
118
  Patches, bug fixes, etc are welcome. Please fork the repository and create
124
119
  a pull request when done fixing/implementing the new feature.
125
120
 
126
-
127
121
  ## Citing GHTorrent in your Research
128
122
 
129
123
  If you find GHTorrent and the accompanying datasets useful in your research,
@@ -131,18 +125,11 @@ please consider citing the following paper:
131
125
 
132
126
  > Georgios Gousios and Diomidis Spinellis, "GHTorrent: GitHub’s data from a firehose," in _MSR '12: Proceedings of the 9th Working Conference on Mining Software Repositories_, June 2-–3, 2012. Zurich, Switzerland.
133
127
 
134
- See also the following presentation:
135
-
136
- <iframe src="http://www.slideshare.net/slideshow/embed_code/13184524?rel=0" width="342" height="291" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" style="border:1px solid #CCC;border-width:1px 1px 0;margin-bottom:5px" allowfullscreen />
137
- <div style="margin-bottom:5px"> <strong> <a href="http://www.slideshare.net/gousiosg/ghtorrent-githubs-data-from-a-firehose-13184524" title="GHTorrent: Github&#39;s Data from a Firehose" target="_blank">GHTorrent: Github&#39;s Data from a Firehose</a> </strong> </div>
138
-
139
-
140
128
  ## Authors
141
129
 
142
130
  * [Georgios Gousios](http://istlab.dmst.aueb.gr/~george) <gousiosg@gmail.com>
143
131
  * [Diomidis Spinellis](http://www.dmst.aueb.gr/dds) <dds@aueb.gr>
144
132
 
145
-
146
133
  ## License
147
134
 
148
135
  [2-clause BSD](http://www.opensource.org/licenses/bsd-license.php)
data/Rakefile CHANGED
@@ -1,15 +1,7 @@
1
1
  require 'rake'
2
- require 'rake/testtask'
3
2
  require 'rake/rdoctask'
4
3
 
5
- task :default => [:spec, :rdoc]
6
-
7
- desc "Run basic tests"
8
- Rake::TestTask.new(:spec) do |t|
9
- t.pattern = 'spec/*_test.rb'
10
- t.verbose = true
11
- t.warning = true
12
- end
4
+ task :default => [:rdoc]
13
5
 
14
6
  desc "Run Rdoc"
15
7
  Rake::RDocTask.new(:rdoc) do |rd|
@@ -9,7 +9,7 @@ Thread.new do
9
9
  puts "Collecting data..."
10
10
  while (true) do
11
11
  sleep(1)
12
- system "clear" or system "cls"
12
+ system 'clear' or system 'cls'
13
13
 
14
14
  stats.each do |k,v|
15
15
  unless v[:time_in].nil?
@@ -68,7 +68,7 @@ end
68
68
 
69
69
  ARGF.each do |x|
70
70
 
71
- next unless x =~ /APIClient/
71
+ next unless x =~ /api_client.rb/
72
72
 
73
73
  if x =~ /sleeping/
74
74
  ts, pid, remaining = x.match(/\[([^.]+).*#([0-9]+)\].*for ([0-9]+).*/).captures
@@ -82,15 +82,20 @@ ARGF.each do |x|
82
82
  end
83
83
 
84
84
  elsif x =~ /Not Found|Gone|Conflict/
85
- pid = x.match(/.*#([0-9]+).*APIClient.*/).captures[0]
85
+ pid = x.match(/.*#([0-9]+).*api_client.rb.*/).captures[0]
86
86
  if stats[pid][:not_found].nil?
87
87
  stats[pid][:not_found] = 0
88
- end
89
- stats[pid][:not_found] += 1
88
+ end
89
+ stats[pid][:not_found] += 1
90
+ elsif x =~ /Forbidden/
91
+ if stats[pid][:forbidden].nil?
92
+ stats[pid][:forbidden] = 0
93
+ end
94
+ stats[pid][:forbidden] += 1
90
95
  else
91
96
  begin
92
97
  ts, pid, ip, url, remaining, time =
93
- x.match(/.*\[([^.]+).*#([0-9]+)\].*APIClient\[(.*)\].*(https:\/\/.*) \(([0-9]+) remaining\).* ([0-9]+) ms$/).captures
98
+ x.match(/.*\[([^.]+).*#([0-9]+)\].*api_client.rb: \[(.*)\].*(https:\/\/.*) \(([0-9]+) remaining\).* ([0-9]+) ms$/).captures
94
99
  rescue
95
100
  puts x
96
101
  next
@@ -0,0 +1,190 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+
4
+ require 'rubygems'
5
+ require 'trollop'
6
+ require 'influxdb'
7
+ require 'pp'
8
+ require 'time'
9
+
10
+ def parse_api_client_line(line)
11
+ if line.start_with?("Successful")
12
+ # Successful request. URL: https://api.github.com/repos/amizony/self-destructing-task-list/comments/11518274?per_page=100, Remaining: 3949, Total: 423 ms
13
+ remaining, elapsed = line.match(/.*Remaining: ([\d]+), Total: ([\d]+) ms/).captures
14
+ {
15
+ :outcome => "success",
16
+ :elapsed => elapsed.to_i,
17
+ :remaining => remaining.to_i
18
+ }
19
+ elsif line.start_with?("Failed")
20
+ # Failed request. URL: https://api.github.com/repos/mingliang7/hotel/commits?per_page=100, Status code: 409, Status: Conflict, Access: ghtorrent, IP: 0.0.0.0, Remaining: 3332
21
+ code, elapsed = line.match(/.*Status code: ([^,]+), .*Remaining: ([\d]+)/).captures
22
+ {
23
+ :outcome => "error",
24
+ :error_code => code.to_i,
25
+ :remaining => remaining.to_i
26
+ }
27
+ else
28
+ {}
29
+ end
30
+ end
31
+
32
+ def parse_data_retrieval_line(line)
33
+ #Success processing event. Type: PushEvent, ID: 2863181313, Time: 967 ms
34
+ return {} unless line.start_with?("Success") or line.start_with?("Error")
35
+ outcome, evt_type, time = line.match(/([^\ ]+) processing event\. Type: ([\D]+)Event, .*, Time: ([\d]+) ms/).captures
36
+
37
+ {
38
+ :outcome => outcome.downcase,
39
+ :evt_type => evt_type,
40
+ :elapsed => time.to_i
41
+ }
42
+ end
43
+
44
+ def parse_retriever_line(line)
45
+
46
+ if line.start_with?("Added")
47
+ # Added repo hiropong -> googlemaplesson
48
+ outcome = "success"
49
+ entity = line.split(/ /)[1]
50
+ elsif line.start_with?("Could not find")
51
+ # Could not find commit_comment 12106552. Deleted?
52
+ outcome = "failure"
53
+ entity = line.split(/ /)[3]
54
+ else
55
+ return {}
56
+ end
57
+
58
+ {
59
+ :outcome => outcome,
60
+ :entity => entity
61
+ }
62
+
63
+ end
64
+
65
+ def parse_ghtorrent_line(line)
66
+
67
+ if line.start_with?("Added")
68
+ # Added user hayjohnny2000
69
+ # Added issue_event etsy/logster -> 1/etsy/logster -> 1/417355
70
+ outcome = "success"
71
+ entity = line.split(/ /)[1]
72
+ elsif line.start_with?("Could not retrieve")
73
+ # Could not retrieve commit_comment 12106552. Deleted?
74
+ outcome = "failure"
75
+ entity = line.split(/ /)[3]
76
+ else
77
+ return {}
78
+ end
79
+
80
+ {
81
+ :outcome => outcome,
82
+ :entity => entity
83
+ }
84
+
85
+ end
86
+
87
+ def parse_log_line(line)
88
+ begin
89
+ severity, time, progname, stage, msg =
90
+ line.match(/([A-Z]+), (.+), (.+) -- ([^:]*?): (.*)/).captures
91
+ rescue
92
+ puts "Error parsing line: #{line}"
93
+ return {}
94
+ end
95
+
96
+ return {} if severity.downcase == 'debug'
97
+ stage = stage.split(/\./)[0]
98
+ data = {
99
+ :time => Time.iso8601(time).to_f,
100
+ :client => progname,
101
+ :severity => severity,
102
+ :stage => stage
103
+ }
104
+
105
+ return {} if msg.nil? or msg.length == 0
106
+
107
+
108
+ stage_specific =
109
+ begin
110
+ case stage
111
+ when 'api_client'
112
+ parse_api_client_line(msg)
113
+ when 'ght_data_retrieval'
114
+ parse_data_retrieval_line(msg)
115
+ when 'retriever'
116
+ parse_retriever_line(msg)
117
+ when 'ghtorrent'
118
+ parse_ghtorrent_line(msg)
119
+ else
120
+ {}
121
+ end
122
+ rescue
123
+ puts "Error parsing line: #{msg}"
124
+ {}
125
+ end
126
+
127
+ return {} if stage_specific.empty?
128
+ data.merge(stage_specific)
129
+ end
130
+
131
+ opts = Trollop::options do
132
+ banner <<-END
133
+ Store GHTorrent log output to InfluxDB. By default reads from STDIN.
134
+ Can be configured to watch files in directories.
135
+
136
+ Options:
137
+ END
138
+
139
+ opt :watch, "Use watch mode", :sort => 'w'
140
+ opt :watch_pattern, "Pattern for files to watch",
141
+ :short => 'p', :default => '*.log'
142
+
143
+ opt :db_server, "InfluxDB server to use", :type => String,
144
+ :short => 's', :default => '127.0.0.1'
145
+ opt :database, "InfluxDB database to use", :type => String,
146
+ :short => 'd', :default => 'ghtorrent'
147
+ opt :db_uname, "Username for the Influx database", :type => String,
148
+ :short => 'u', :default => 'ghtorrent'
149
+ opt :db_passwd, "Password for the Influx database", :type => String,
150
+ :short => 'x', :default => ''
151
+ end
152
+
153
+ unless opts[:db_passwd_given].nil?
154
+ influx = InfluxDB::Client.new(opts[:database],
155
+ :host => opts[:db_server],
156
+ :username => opts[:db_uname],
157
+ :password => opts[:db_passwd])
158
+ else
159
+ influx = InfluxDB::Client.new("ghtorrent",
160
+ :host => opts[:db_server])
161
+ end
162
+ influx.get_database_list
163
+
164
+ if opts[:watch]
165
+ require 'filewatch/tail'
166
+
167
+ t = FileWatch::Tail.new
168
+ t.tail(opts[:watch_pattern])
169
+
170
+ t.subscribe do |path, line|
171
+ p = parse_log_line(line)
172
+ next if p.empty?
173
+
174
+ pp p
175
+ #influx.write_point(p[:stage], p)
176
+ end
177
+ else
178
+ puts "Reading from STDIN..."
179
+ ARGF.each do |line|
180
+ next if line !~ /^[IDEW]/
181
+
182
+ begin
183
+ p = parse_log_line(line)
184
+ next if p.empty?
185
+ pp p
186
+ influx.write_point(p[:stage], p)
187
+ rescue
188
+ end
189
+ end
190
+ end