mines 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/.yardopts +6 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +37 -0
- data/README.md +25 -0
- data/Rakefile +8 -0
- data/bin/mines +8 -8
- data/config/application.rb +11 -0
- data/config/help.rb +13 -0
- data/examples/README.md +4 -0
- data/examples/hashtags/Gemfile +24 -0
- data/examples/hashtags/Gemfile.lock +81 -0
- data/examples/hashtags/README.md +22 -0
- data/examples/hashtags/config/application.rb +30 -0
- data/examples/hashtags/miners/metrics.rb +157 -0
- data/examples/hashtags/miners/process.rb +46 -0
- data/examples/hashtags/miners/publicstream.rb +108 -0
- data/examples/hashtags/spec/metrics_spec.rb +23 -0
- data/examples/hashtags/spec/process_spec.rb +23 -0
- data/examples/hashtags/spec/publicstream_spec.rb +23 -0
- data/lib/generators/README.md +13 -0
- data/lib/generators/application.rb +3 -2
- data/lib/generators/miner.rb +10 -2
- data/lib/generators/templates/Gemfile.erb +24 -0
- data/lib/generators/templates/README.md +24 -0
- data/lib/generators/templates/application_config.erb +11 -0
- data/lib/generators/templates/metrics_miner.erb +28 -0
- data/lib/generators/templates/miner_spec.erb +23 -0
- data/lib/generators/templates/network_miner.erb +32 -0
- data/lib/generators/templates/process_miner.erb +26 -1
- data/lib/generators/templates/twitter_miner.erb +111 -0
- data/lib/logging.rb +10 -3
- data/mines.gemspec +33 -0
- data/spec/logging_spec.rb +19 -0
- data/spec/mines_spec.rb +19 -0
- data/spec/redis_store_spec.rb +64 -0
- data/spec/utilities_spec.rb +26 -0
- metadata +45 -11
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/bin/ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :process
|
7
|
+
|
8
|
+
@log.info "Process miner pid: #{Process.pid}"
|
9
|
+
|
10
|
+
## Connect to an Object Queue
|
11
|
+
## elementes are pushed to this queue by the network miner
|
12
|
+
queue = ObjectQueue.new "publicstream"
|
13
|
+
|
14
|
+
counter = Counter.new "original"
|
15
|
+
|
16
|
+
channel = MessageChannel.new("web") # example channel
|
17
|
+
|
18
|
+
## Create a timeseries metrics store to
|
19
|
+
## save the occurence rate for each tag for each minute
|
20
|
+
ts = TimeSeries.new("hashtags")
|
21
|
+
|
22
|
+
## Create a timeseries metrics store to
|
23
|
+
## save the total number of tweets each minute
|
24
|
+
tweets_rate = TimeSeries.new("tweets_rate")
|
25
|
+
|
26
|
+
## Start the process loop
|
27
|
+
loop {
|
28
|
+
## Get en element from the queue
|
29
|
+
status = queue.pop
|
30
|
+
tweets_rate.push "all"
|
31
|
+
|
32
|
+
## Filter out Retweets and Replies
|
33
|
+
unless status.text =~ /^RT/ or
|
34
|
+
status.retweet_count > 0 or
|
35
|
+
status.text =~ /^@/
|
36
|
+
|
37
|
+
print_tweet(status.user.screen_name, status.text)
|
38
|
+
|
39
|
+
status.hashtags.each do |hashtag|
|
40
|
+
ts.push (hashtag.text)
|
41
|
+
end
|
42
|
+
counter.incr
|
43
|
+
channel.publish({:user => status.user.screen_name, :text => status.text}.to_json)
|
44
|
+
#web.publish({:user => status.user.screen_name, :text => status.text.gsub(/(https?:\/\/\S+)/, '<a href="\1" target="_blank">\1</a> ').to_s}.to_json)
|
45
|
+
end
|
46
|
+
} # end loop
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :network
|
7
|
+
|
8
|
+
## Create an Object Queue
|
9
|
+
queue = ObjectQueue.new "publicstream"
|
10
|
+
|
11
|
+
## Provide your Twitter Streaming API keys and credentials
|
12
|
+
##
|
13
|
+
## example:
|
14
|
+
# TweetStream.configure do |config|
|
15
|
+
# config.consumer_key = "mZ0gsfwgA"
|
16
|
+
# config.consumer_secret = "nqwQqELrvdb6C"
|
17
|
+
# config.oauth_token = "OfIUtfTmP6NQuwWKlH"
|
18
|
+
# config.oauth_token_secret = 'u4bFMpQxfgwZl'
|
19
|
+
# config.auth_method = :oauth
|
20
|
+
# end
|
21
|
+
|
22
|
+
## It is recommended to provide your credentials
|
23
|
+
## in a different file outside version control system
|
24
|
+
## to avoid publishing them.
|
25
|
+
## example:
|
26
|
+
require_relative '../../twitter_credentials'
|
27
|
+
|
28
|
+
## Initialize TweetStream Client
|
29
|
+
client = TweetStream::Client.new
|
30
|
+
|
31
|
+
## Print custom messages when client receives special event messages
|
32
|
+
## See also: https://dev.twitter.com/docs/streaming-apis/messages
|
33
|
+
client.on_error do |message|
|
34
|
+
msg = "\nError message: #{message}\n"+
|
35
|
+
"An HTTP error is encountered in the processing of the stream. "+
|
36
|
+
"Note that TweetStream will automatically try to reconnect, "+
|
37
|
+
"this is for reference only. Don't panic!"
|
38
|
+
puts red msg
|
39
|
+
@lod.error msg
|
40
|
+
end
|
41
|
+
client.on_unauthorized do
|
42
|
+
msg = "\nUnauthorized.\n"+
|
43
|
+
"An HTTP status 401 is encountered while connecting to Twitter. "+
|
44
|
+
"This could happen when system clock drift has occured."
|
45
|
+
puts red msg
|
46
|
+
@log.error msg
|
47
|
+
end
|
48
|
+
client.on_inited do
|
49
|
+
msg = "Connection Established"
|
50
|
+
puts green msg
|
51
|
+
@log.info msg
|
52
|
+
end
|
53
|
+
client.on_no_data_received do
|
54
|
+
msg = "\nNo data was received from the server and a stall occurred. "+
|
55
|
+
"Twitter defines this to be 90 seconds."
|
56
|
+
puts red msg
|
57
|
+
@log.info msg
|
58
|
+
end
|
59
|
+
client.on_delete do |status_id, user_id|
|
60
|
+
msg = "\nStatus deletion notice: user: #{user_id} status: #{status_id}"
|
61
|
+
puts yellow msg
|
62
|
+
@log.info msg
|
63
|
+
end
|
64
|
+
client.on_reconnect do |timeout, retries|
|
65
|
+
msg = "Reconnect Timeout: #{timeout} retries: #{retries}"
|
66
|
+
puts red msg
|
67
|
+
@log.info msg
|
68
|
+
end
|
69
|
+
client.on_limit do |skip_count|
|
70
|
+
msg = "\nA rate limit notice is received from the Twitter stream.\n"+
|
71
|
+
"Discarded Count: #{skip_count}"
|
72
|
+
puts yellow msg
|
73
|
+
@log.info msg
|
74
|
+
end
|
75
|
+
client.on_enhance_your_calm do
|
76
|
+
msg = "\nEnhance your calm"
|
77
|
+
puts yellow msg
|
78
|
+
@log.info msg
|
79
|
+
end
|
80
|
+
|
81
|
+
## Make a call to the statuses/filter method of the Streaming API,
|
82
|
+
## you may provide :follow, :track or :locations.
|
83
|
+
##
|
84
|
+
## :track Phrases of keywords to track
|
85
|
+
## :follow A list of user IDs, indicating the
|
86
|
+
## users to return statuses for in the stream.
|
87
|
+
## :locations Specifies a set of bounding boxes to track
|
88
|
+
## e.g. :locations => [-122.75,36.8,-121.75,37.8,-74,40,-73,41]
|
89
|
+
##
|
90
|
+
client.filter(:track => %w( μια το να και το του της τα τι
|
91
|
+
για δεν στο στον στην με από απο
|
92
|
+
θα τις οι μου) ) do |status|
|
93
|
+
#client.filter(:track => %w( hi ) ) do |status|
|
94
|
+
## status is an object of class Tweet
|
95
|
+
|
96
|
+
## Print the user's screen name
|
97
|
+
# puts blue status.user.screen_name
|
98
|
+
|
99
|
+
## Print the tweet's text
|
100
|
+
# puts green status.text
|
101
|
+
|
102
|
+
## Print a blue dot to indicate a tweet has been received
|
103
|
+
print blue "."
|
104
|
+
|
105
|
+
## Push the status to the object queue
|
106
|
+
## the object queue will marshal the object so
|
107
|
+
queue.push(status)
|
108
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require "minitest/autorun"
|
5
|
+
require_relative '../miners/metrics.rb'
|
6
|
+
|
7
|
+
describe "metrics Miner" do
|
8
|
+
|
9
|
+
before do
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
end
|
14
|
+
|
15
|
+
it "metrics does this" do
|
16
|
+
true.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "metrics does that" do
|
20
|
+
true.must_equal true
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require "minitest/autorun"
|
5
|
+
require_relative '../miners/process.rb'
|
6
|
+
|
7
|
+
describe "process Miner" do
|
8
|
+
|
9
|
+
before do
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
end
|
14
|
+
|
15
|
+
it "process does this" do
|
16
|
+
true.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "process does that" do
|
20
|
+
true.must_equal true
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require "minitest/autorun"
|
5
|
+
require_relative '../miners/publicstream.rb'
|
6
|
+
|
7
|
+
describe "publicstream Miner" do
|
8
|
+
|
9
|
+
before do
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
end
|
14
|
+
|
15
|
+
it "publicstream does this" do
|
16
|
+
true.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "publicstream does that" do
|
20
|
+
true.must_equal true
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
### Generators
|
2
|
+
|
3
|
+
Generators are used to create directories, files with boilerplate code etc
|
4
|
+
|
5
|
+
|
6
|
+
#### Generators
|
7
|
+
|
8
|
+
* **Application**
|
9
|
+
creates the default file structure and prints appropriate messages
|
10
|
+
|
11
|
+
* **Miner**
|
12
|
+
creates spesific miners based on templates
|
13
|
+
|
@@ -14,7 +14,7 @@ module Mines::Generator
|
|
14
14
|
# Accepts as an argument the name of the new application. A new directory with this name will be created
|
15
15
|
class Application < Thor::Group
|
16
16
|
include Thor::Actions
|
17
|
-
include Mines::Logging
|
17
|
+
#include Mines::Logging
|
18
18
|
|
19
19
|
# Define arguments and options
|
20
20
|
argument :name
|
@@ -29,13 +29,14 @@ module Mines::Generator
|
|
29
29
|
# - Files: TODO
|
30
30
|
# also prints appropriate messages
|
31
31
|
def create_directory_structure
|
32
|
-
log.info "Creating directory structure in dir: " + name
|
32
|
+
#@log.info "Creating directory structure in dir: " + name
|
33
33
|
empty_directory name
|
34
34
|
empty_directory name + "/miners"
|
35
35
|
empty_directory name + "/lib"
|
36
36
|
empty_directory name + "/log"
|
37
37
|
empty_directory name + "/config"
|
38
38
|
template "templates/application_config.erb", "#{name}/config/application.rb"
|
39
|
+
template "templates/Gemfile.erb", "#{name}/Gemfile"
|
39
40
|
puts "Application created successfully! ".green
|
40
41
|
print "Type "
|
41
42
|
print "'cd #{name}' ".yellow
|
data/lib/generators/miner.rb
CHANGED
@@ -13,7 +13,7 @@ module Mines::Generator
|
|
13
13
|
# - Metrics
|
14
14
|
class Miner < Thor::Group
|
15
15
|
include Thor::Actions
|
16
|
-
include Mines::Logging
|
16
|
+
#include Mines::Logging
|
17
17
|
|
18
18
|
# Define arguments and options
|
19
19
|
argument :type, :desc => "The type of the miner, [Network,Process,Metrics]", :required => true
|
@@ -32,7 +32,15 @@ module Mines::Generator
|
|
32
32
|
#puts options[:evented]
|
33
33
|
template "templates/#{type}_miner.erb", "miners/#{name}.rb"
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
|
+
# Use the appropriate template file according to type
|
37
|
+
# and put the generated file in miners directory
|
38
|
+
def create_miner_spec
|
39
|
+
puts "Copy miner spec template"
|
40
|
+
#puts options[:evented]
|
41
|
+
template "templates/miner_spec.erb", "spec/#{name}_spec.rb"
|
42
|
+
end
|
43
|
+
|
36
44
|
end # class
|
37
45
|
|
38
46
|
end # module
|
@@ -0,0 +1,24 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gem 'minitest'
|
4
|
+
|
5
|
+
group :network do
|
6
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
7
|
+
gem 'tweetstream' # Twitter Streaming API library
|
8
|
+
end
|
9
|
+
|
10
|
+
group :process do
|
11
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
12
|
+
gem 'json'
|
13
|
+
gem 'time_series' , :git => 'git://github.com/panayiotis/time_series.git'
|
14
|
+
gem 'twitter'
|
15
|
+
gem 'awesome_print'
|
16
|
+
end
|
17
|
+
|
18
|
+
group :metrics do
|
19
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
20
|
+
gem 'time_series' , :git => 'git://github.com/panayiotis/time_series.git'
|
21
|
+
gem 'json'
|
22
|
+
gem 'eventmachine'
|
23
|
+
gem 'awesome_print'
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
### Templates
|
2
|
+
|
3
|
+
Files with boilerplate code are created based on these templates
|
4
|
+
|
5
|
+
|
6
|
+
#### Templates
|
7
|
+
|
8
|
+
* **network miner**
|
9
|
+
Boilerplate code for the network miner. Will be placed in miners directory.
|
10
|
+
|
11
|
+
* **process miner**
|
12
|
+
Boilerplate code for the process miner. Will be placed in miners directory.
|
13
|
+
|
14
|
+
* **twitter miner**
|
15
|
+
Boilerplate code for the twitter miner. Will be placed in miners directory.
|
16
|
+
|
17
|
+
* **metrics miner**
|
18
|
+
Boilerplate code for the metrics miner. Will be placed in miners directory.
|
19
|
+
|
20
|
+
* **application config**
|
21
|
+
Application configuration. Will be placed in config directory.
|
22
|
+
|
23
|
+
* **minner spec**
|
24
|
+
Miner spec. Will be created for each miner and placed in spec directory.
|
@@ -6,6 +6,8 @@ Application.config do
|
|
6
6
|
parameter :var1
|
7
7
|
parameter :var2
|
8
8
|
parameter :var3
|
9
|
+
parameter :logname
|
10
|
+
parameter :app_prefix
|
9
11
|
end
|
10
12
|
|
11
13
|
## Change their values
|
@@ -13,7 +15,16 @@ Application.config do
|
|
13
15
|
var1 "value"
|
14
16
|
var2 ['one','two','three']
|
15
17
|
var3 { :one=>1, :two=>2 }
|
18
|
+
logname "<%= name %>.log"
|
19
|
+
app_prefix "<%= name %>"
|
16
20
|
end
|
17
21
|
|
22
|
+
|
23
|
+
## Initializer
|
24
|
+
## e.g. initialize a global variable
|
25
|
+
## $app_prefix variable needs to be global
|
26
|
+
$app_prefix = Application.app_prefix
|
27
|
+
|
28
|
+
|
18
29
|
## Usage example
|
19
30
|
# puts Application.var1
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :network
|
7
|
+
|
8
|
+
log_name :<%= name %>
|
9
|
+
|
10
|
+
## Connect to a TimeSeries metrics store
|
11
|
+
## Data is been pushed constantly to the timeseries by
|
12
|
+
## the process miner
|
13
|
+
@ts = TimeSeries.new("<%= name %>")
|
14
|
+
|
15
|
+
|
16
|
+
## Start Event Machine
|
17
|
+
EM.run do
|
18
|
+
|
19
|
+
## Periodic execution
|
20
|
+
EM::PeriodicTimer.new(13){
|
21
|
+
print blue '.'
|
22
|
+
}
|
23
|
+
|
24
|
+
## Periodic execution
|
25
|
+
EM::PeriodicTimer.new(1){
|
26
|
+
print red '.'
|
27
|
+
}
|
28
|
+
end # end EM
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
require "minitest/autorun"
|
5
|
+
require_relative '../miners/<%= name %>.rb'
|
6
|
+
|
7
|
+
describe "<%= name %> Miner" do
|
8
|
+
|
9
|
+
before do
|
10
|
+
end
|
11
|
+
|
12
|
+
after do
|
13
|
+
end
|
14
|
+
|
15
|
+
it "<%= name %> does this" do
|
16
|
+
true.must_equal true
|
17
|
+
end
|
18
|
+
|
19
|
+
it "<%= name %> does that" do
|
20
|
+
true.must_equal true
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :network
|
7
|
+
|
8
|
+
#log_name :<%= name %>
|
9
|
+
|
10
|
+
## Use event Machine for non-blocking IO
|
11
|
+
## or Threads for blocking IO
|
12
|
+
|
13
|
+
## Create Thread example
|
14
|
+
Thread.new { loop { print green '.'; `sleep 1` } }
|
15
|
+
|
16
|
+
# Start Event Machine
|
17
|
+
EM.run do
|
18
|
+
|
19
|
+
## Periodic execution
|
20
|
+
EM::PeriodicTimer.new(1){
|
21
|
+
print blue '.'
|
22
|
+
## Avoid blocking commands inside the Reactor.
|
23
|
+
## This 'sleep' is blocking the other
|
24
|
+
## timer too. Use Threads instead.
|
25
|
+
`sleep 1`
|
26
|
+
}
|
27
|
+
|
28
|
+
## Periodic execution
|
29
|
+
EM::PeriodicTimer.new(0.5){
|
30
|
+
print red '.'
|
31
|
+
}
|
32
|
+
end # end EM
|