mines 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.yardopts +6 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +37 -0
- data/README.md +25 -0
- data/Rakefile +8 -0
- data/bin/mines +8 -8
- data/config/application.rb +11 -0
- data/config/help.rb +13 -0
- data/examples/README.md +4 -0
- data/examples/hashtags/Gemfile +24 -0
- data/examples/hashtags/Gemfile.lock +81 -0
- data/examples/hashtags/README.md +22 -0
- data/examples/hashtags/config/application.rb +30 -0
- data/examples/hashtags/miners/metrics.rb +157 -0
- data/examples/hashtags/miners/process.rb +46 -0
- data/examples/hashtags/miners/publicstream.rb +108 -0
- data/examples/hashtags/spec/metrics_spec.rb +23 -0
- data/examples/hashtags/spec/process_spec.rb +23 -0
- data/examples/hashtags/spec/publicstream_spec.rb +23 -0
- data/lib/generators/README.md +13 -0
- data/lib/generators/application.rb +3 -2
- data/lib/generators/miner.rb +10 -2
- data/lib/generators/templates/Gemfile.erb +24 -0
- data/lib/generators/templates/README.md +24 -0
- data/lib/generators/templates/application_config.erb +11 -0
- data/lib/generators/templates/metrics_miner.erb +28 -0
- data/lib/generators/templates/miner_spec.erb +23 -0
- data/lib/generators/templates/network_miner.erb +32 -0
- data/lib/generators/templates/process_miner.erb +26 -1
- data/lib/generators/templates/twitter_miner.erb +111 -0
- data/lib/logging.rb +10 -3
- data/mines.gemspec +33 -0
- data/spec/logging_spec.rb +19 -0
- data/spec/mines_spec.rb +19 -0
- data/spec/redis_store_spec.rb +64 -0
- data/spec/utilities_spec.rb +26 -0
- metadata +45 -11
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/bin/ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :process
|
7
|
+
|
8
|
+
@log.info "Process miner pid: #{Process.pid}"
|
9
|
+
|
10
|
+
## Connect to an Object Queue
|
11
|
+
## elementes are pushed to this queue by the network miner
|
12
|
+
queue = ObjectQueue.new "publicstream"
|
13
|
+
|
14
|
+
counter = Counter.new "original"
|
15
|
+
|
16
|
+
channel = MessageChannel.new("web") # example channel
|
17
|
+
|
18
|
+
## Create a timeseries metrics store to
|
19
|
+
## save the occurence rate for each tag for each minute
|
20
|
+
ts = TimeSeries.new("hashtags")
|
21
|
+
|
22
|
+
## Create a timeseries metrics store to
|
23
|
+
## save the total number of tweets each minute
|
24
|
+
tweets_rate = TimeSeries.new("tweets_rate")
|
25
|
+
|
26
|
+
## Start the process loop
|
27
|
+
loop {
|
28
|
+
## Get en element from the queue
|
29
|
+
status = queue.pop
|
30
|
+
tweets_rate.push "all"
|
31
|
+
|
32
|
+
## Filter out Retweets and Replies
|
33
|
+
unless status.text =~ /^RT/ or
|
34
|
+
status.retweet_count > 0 or
|
35
|
+
status.text =~ /^@/
|
36
|
+
|
37
|
+
print_tweet(status.user.screen_name, status.text)
|
38
|
+
|
39
|
+
status.hashtags.each do |hashtag|
|
40
|
+
ts.push (hashtag.text)
|
41
|
+
end
|
42
|
+
counter.incr
|
43
|
+
channel.publish({:user => status.user.screen_name, :text => status.text}.to_json)
|
44
|
+
#web.publish({:user => status.user.screen_name, :text => status.text.gsub(/(https?:\/\/\S+)/, '<a href="\1" target="_blank">\1</a> ').to_s}.to_json)
|
45
|
+
end
|
46
|
+
} # end loop
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :network
|
7
|
+
|
8
|
+
## Create an Object Queue
|
9
|
+
queue = ObjectQueue.new "publicstream"
|
10
|
+
|
11
|
+
## Provide your Twitter Streaming API keys and credentials
|
12
|
+
##
|
13
|
+
## example:
|
14
|
+
# TweetStream.configure do |config|
|
15
|
+
# config.consumer_key = "mZ0gsfwgA"
|
16
|
+
# config.consumer_secret = "nqwQqELrvdb6C"
|
17
|
+
# config.oauth_token = "OfIUtfTmP6NQuwWKlH"
|
18
|
+
# config.oauth_token_secret = 'u4bFMpQxfgwZl'
|
19
|
+
# config.auth_method = :oauth
|
20
|
+
# end
|
21
|
+
|
22
|
+
## It is recommended to provide your credentials
|
23
|
+
## in a different file outside version control system
|
24
|
+
## to avoid publishing them.
|
25
|
+
## example:
|
26
|
+
require_relative '../../twitter_credentials'
|
27
|
+
|
28
|
+
## Initialize TweetStream Client
|
29
|
+
client = TweetStream::Client.new
|
30
|
+
|
31
|
+
## Print custom messages when client receives special event messages
|
32
|
+
## See also: https://dev.twitter.com/docs/streaming-apis/messages
|
33
|
+
client.on_error do |message|
|
34
|
+
msg = "\nError message: #{message}\n"+
|
35
|
+
"An HTTP error is encountered in the processing of the stream. "+
|
36
|
+
"Note that TweetStream will automatically try to reconnect, "+
|
37
|
+
"this is for reference only. Don't panic!"
|
38
|
+
puts red msg
|
39
|
+
@lod.error msg
|
40
|
+
end
|
41
|
+
client.on_unauthorized do
|
42
|
+
msg = "\nUnauthorized.\n"+
|
43
|
+
"An HTTP status 401 is encountered while connecting to Twitter. "+
|
44
|
+
"This could happen when system clock drift has occured."
|
45
|
+
puts red msg
|
46
|
+
@log.error msg
|
47
|
+
end
|
48
|
+
client.on_inited do
|
49
|
+
msg = "Connection Established"
|
50
|
+
puts green msg
|
51
|
+
@log.info msg
|
52
|
+
end
|
53
|
+
client.on_no_data_received do
|
54
|
+
msg = "\nNo data was received from the server and a stall occurred. "+
|
55
|
+
"Twitter defines this to be 90 seconds."
|
56
|
+
puts red msg
|
57
|
+
@log.info msg
|
58
|
+
end
|
59
|
+
client.on_delete do |status_id, user_id|
|
60
|
+
msg = "\nStatus deletion notice: user: #{user_id} status: #{status_id}"
|
61
|
+
puts yellow msg
|
62
|
+
@log.info msg
|
63
|
+
end
|
64
|
+
client.on_reconnect do |timeout, retries|
|
65
|
+
msg = "Reconnect Timeout: #{timeout} retries: #{retries}"
|
66
|
+
puts red msg
|
67
|
+
@log.info msg
|
68
|
+
end
|
69
|
+
client.on_limit do |skip_count|
|
70
|
+
msg = "\nA rate limit notice is received from the Twitter stream.\n"+
|
71
|
+
"Discarded Count: #{skip_count}"
|
72
|
+
puts yellow msg
|
73
|
+
@log.info msg
|
74
|
+
end
|
75
|
+
client.on_enhance_your_calm do
|
76
|
+
msg = "\nEnhance your calm"
|
77
|
+
puts yellow msg
|
78
|
+
@log.info msg
|
79
|
+
end
|
80
|
+
|
81
|
+
## Make a call to the statuses/filter method of the Streaming API,
|
82
|
+
## you may provide :follow, :track or :locations.
|
83
|
+
##
|
84
|
+
## :track Phrases of keywords to track
|
85
|
+
## :follow A list of user IDs, indicating the
|
86
|
+
## users to return statuses for in the stream.
|
87
|
+
## :locations Specifies a set of bounding boxes to track
|
88
|
+
## e.g. :locations => [-122.75,36.8,-121.75,37.8,-74,40,-73,41]
|
89
|
+
##
|
90
|
+
client.filter(:track => %w( μια το να και το του της τα τι
|
91
|
+
για δεν στο στον στην με από απο
|
92
|
+
θα τις οι μου) ) do |status|
|
93
|
+
#client.filter(:track => %w( hi ) ) do |status|
|
94
|
+
## status is an object of class Tweet
|
95
|
+
|
96
|
+
## Print the user's screen name
|
97
|
+
# puts blue status.user.screen_name
|
98
|
+
|
99
|
+
## Print the tweet's text
|
100
|
+
# puts green status.text
|
101
|
+
|
102
|
+
## Print a blue dot to indicate a tweet has been received
|
103
|
+
print blue "."
|
104
|
+
|
105
|
+
## Push the status to the object queue
|
106
|
+
## the object queue will marshal the object so
|
107
|
+
queue.push(status)
|
108
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require "minitest/autorun"
|
5
|
+
require_relative '../miners/metrics.rb'
|
6
|
+
|
7
|
+
describe "metrics Miner" do
|
8
|
+
|
9
|
+
before do
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
end
|
14
|
+
|
15
|
+
it "metrics does this" do
|
16
|
+
true.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "metrics does that" do
|
20
|
+
true.must_equal true
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require "minitest/autorun"
|
5
|
+
require_relative '../miners/process.rb'
|
6
|
+
|
7
|
+
describe "process Miner" do
|
8
|
+
|
9
|
+
before do
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
end
|
14
|
+
|
15
|
+
it "process does this" do
|
16
|
+
true.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "process does that" do
|
20
|
+
true.must_equal true
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require "minitest/autorun"
|
5
|
+
require_relative '../miners/publicstream.rb'
|
6
|
+
|
7
|
+
describe "publicstream Miner" do
|
8
|
+
|
9
|
+
before do
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
end
|
14
|
+
|
15
|
+
it "publicstream does this" do
|
16
|
+
true.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "publicstream does that" do
|
20
|
+
true.must_equal true
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
### Generators
|
2
|
+
|
3
|
+
Generators are used to create directories, files with boilerplate code etc
|
4
|
+
|
5
|
+
|
6
|
+
#### Generators
|
7
|
+
|
8
|
+
* **Application**
|
9
|
+
creates the default file structure and prints appropriate messages
|
10
|
+
|
11
|
+
* **Miner**
|
12
|
+
creates spesific miners based on templates
|
13
|
+
|
@@ -14,7 +14,7 @@ module Mines::Generator
|
|
14
14
|
# Accepts as an argument the name of the new application. A new directory with this name will be created
|
15
15
|
class Application < Thor::Group
|
16
16
|
include Thor::Actions
|
17
|
-
include Mines::Logging
|
17
|
+
#include Mines::Logging
|
18
18
|
|
19
19
|
# Define arguments and options
|
20
20
|
argument :name
|
@@ -29,13 +29,14 @@ module Mines::Generator
|
|
29
29
|
# - Files: TODO
|
30
30
|
# also prints appropriate messages
|
31
31
|
def create_directory_structure
|
32
|
-
log.info "Creating directory structure in dir: " + name
|
32
|
+
#@log.info "Creating directory structure in dir: " + name
|
33
33
|
empty_directory name
|
34
34
|
empty_directory name + "/miners"
|
35
35
|
empty_directory name + "/lib"
|
36
36
|
empty_directory name + "/log"
|
37
37
|
empty_directory name + "/config"
|
38
38
|
template "templates/application_config.erb", "#{name}/config/application.rb"
|
39
|
+
template "templates/Gemfile.erb", "#{name}/Gemfile"
|
39
40
|
puts "Application created successfully! ".green
|
40
41
|
print "Type "
|
41
42
|
print "'cd #{name}' ".yellow
|
data/lib/generators/miner.rb
CHANGED
@@ -13,7 +13,7 @@ module Mines::Generator
|
|
13
13
|
# - Metrics
|
14
14
|
class Miner < Thor::Group
|
15
15
|
include Thor::Actions
|
16
|
-
include Mines::Logging
|
16
|
+
#include Mines::Logging
|
17
17
|
|
18
18
|
# Define arguments and options
|
19
19
|
argument :type, :desc => "The type of the miner, [Network,Process,Metrics]", :required => true
|
@@ -32,7 +32,15 @@ module Mines::Generator
|
|
32
32
|
#puts options[:evented]
|
33
33
|
template "templates/#{type}_miner.erb", "miners/#{name}.rb"
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
|
+
# Use the appropriate template file according to type
|
37
|
+
# and put the generated file in miners directory
|
38
|
+
def create_miner_spec
|
39
|
+
puts "Copy miner spec template"
|
40
|
+
#puts options[:evented]
|
41
|
+
template "templates/miner_spec.erb", "spec/#{name}_spec.rb"
|
42
|
+
end
|
43
|
+
|
36
44
|
end # class
|
37
45
|
|
38
46
|
end # module
|
@@ -0,0 +1,24 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gem 'minitest'
|
4
|
+
|
5
|
+
group :network do
|
6
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
7
|
+
gem 'tweetstream' # Twitter Streaming API library
|
8
|
+
end
|
9
|
+
|
10
|
+
group :process do
|
11
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
12
|
+
gem 'json'
|
13
|
+
gem 'time_series' , :git => 'git://github.com/panayiotis/time_series.git'
|
14
|
+
gem 'twitter'
|
15
|
+
gem 'awesome_print'
|
16
|
+
end
|
17
|
+
|
18
|
+
group :metrics do
|
19
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
20
|
+
gem 'time_series' , :git => 'git://github.com/panayiotis/time_series.git'
|
21
|
+
gem 'json'
|
22
|
+
gem 'eventmachine'
|
23
|
+
gem 'awesome_print'
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
### Templates
|
2
|
+
|
3
|
+
Files with boilerplate code are created based on these templates
|
4
|
+
|
5
|
+
|
6
|
+
#### Templates
|
7
|
+
|
8
|
+
* **network miner**
|
9
|
+
Boilerplate code for the network miner. Will be placed in miners directory.
|
10
|
+
|
11
|
+
* **process miner**
|
12
|
+
Boilerplate code for the process miner. Will be placed in miners directory.
|
13
|
+
|
14
|
+
* **twitter miner**
|
15
|
+
Boilerplate code for the twitter miner. Will be placed in miners directory.
|
16
|
+
|
17
|
+
* **metrics miner**
|
18
|
+
Boilerplate code for the metrics miner. Will be placed in miners directory.
|
19
|
+
|
20
|
+
* **application config**
|
21
|
+
Application configuration. Will be placed in config directory.
|
22
|
+
|
23
|
+
* **minner spec**
|
24
|
+
Miner spec. Will be created for each miner and placed in spec directory.
|
@@ -6,6 +6,8 @@ Application.config do
|
|
6
6
|
parameter :var1
|
7
7
|
parameter :var2
|
8
8
|
parameter :var3
|
9
|
+
parameter :logname
|
10
|
+
parameter :app_prefix
|
9
11
|
end
|
10
12
|
|
11
13
|
## Change their values
|
@@ -13,7 +15,16 @@ Application.config do
|
|
13
15
|
var1 "value"
|
14
16
|
var2 ['one','two','three']
|
15
17
|
var3 { :one=>1, :two=>2 }
|
18
|
+
logname "<%= name %>.log"
|
19
|
+
app_prefix "<%= name %>"
|
16
20
|
end
|
17
21
|
|
22
|
+
|
23
|
+
## Initializer
|
24
|
+
## e.g. initialize a global variable
|
25
|
+
## $app_prefix variable needs to be global
|
26
|
+
$app_prefix = Application.app_prefix
|
27
|
+
|
28
|
+
|
18
29
|
## Usage example
|
19
30
|
# puts Application.var1
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :network
|
7
|
+
|
8
|
+
log_name :<%= name %>
|
9
|
+
|
10
|
+
## Connect to a TimeSeries metrics store
|
11
|
+
## Data is been pushed constantly to the timeseries by
|
12
|
+
## the process miner
|
13
|
+
@ts = TimeSeries.new("<%= name %>")
|
14
|
+
|
15
|
+
|
16
|
+
## Start Event Machine
|
17
|
+
EM.run do
|
18
|
+
|
19
|
+
## Periodic execution
|
20
|
+
EM::PeriodicTimer.new(13){
|
21
|
+
print blue '.'
|
22
|
+
}
|
23
|
+
|
24
|
+
## Periodic execution
|
25
|
+
EM::PeriodicTimer.new(1){
|
26
|
+
print red '.'
|
27
|
+
}
|
28
|
+
end # end EM
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require "minitest/autorun"
|
5
|
+
require_relative '../miners/<%= name %>.rb'
|
6
|
+
|
7
|
+
describe "<%= name %> Miner" do
|
8
|
+
|
9
|
+
before do
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
end
|
14
|
+
|
15
|
+
it "<%= name %> does this" do
|
16
|
+
true.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "<%= name %> does that" do
|
20
|
+
true.must_equal true
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :network
|
7
|
+
|
8
|
+
#log_name :<%= name %>
|
9
|
+
|
10
|
+
## Use event Machine for non-blocking IO
|
11
|
+
## or Threads for blocking IO
|
12
|
+
|
13
|
+
## Create Thread example
|
14
|
+
Thread.new { loop { print green '.'; `sleep 1` } }
|
15
|
+
|
16
|
+
# Start Event Machine
|
17
|
+
EM.run do
|
18
|
+
|
19
|
+
## Periodic execution
|
20
|
+
EM::PeriodicTimer.new(1){
|
21
|
+
print blue '.'
|
22
|
+
## Avoid blocking commands inside the Reactor.
|
23
|
+
## This 'sleep' is blocking the other
|
24
|
+
## timer too. Use Threads instead.
|
25
|
+
`sleep 1`
|
26
|
+
}
|
27
|
+
|
28
|
+
## Periodic execution
|
29
|
+
EM::PeriodicTimer.new(0.5){
|
30
|
+
print red '.'
|
31
|
+
}
|
32
|
+
end # end EM
|