mines 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/.yardopts +6 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +37 -0
- data/README.md +25 -0
- data/Rakefile +8 -0
- data/bin/mines +8 -8
- data/config/application.rb +11 -0
- data/config/help.rb +13 -0
- data/examples/README.md +4 -0
- data/examples/hashtags/Gemfile +24 -0
- data/examples/hashtags/Gemfile.lock +81 -0
- data/examples/hashtags/README.md +22 -0
- data/examples/hashtags/config/application.rb +30 -0
- data/examples/hashtags/miners/metrics.rb +157 -0
- data/examples/hashtags/miners/process.rb +46 -0
- data/examples/hashtags/miners/publicstream.rb +108 -0
- data/examples/hashtags/spec/metrics_spec.rb +23 -0
- data/examples/hashtags/spec/process_spec.rb +23 -0
- data/examples/hashtags/spec/publicstream_spec.rb +23 -0
- data/lib/generators/README.md +13 -0
- data/lib/generators/application.rb +3 -2
- data/lib/generators/miner.rb +10 -2
- data/lib/generators/templates/Gemfile.erb +24 -0
- data/lib/generators/templates/README.md +24 -0
- data/lib/generators/templates/application_config.erb +11 -0
- data/lib/generators/templates/metrics_miner.erb +28 -0
- data/lib/generators/templates/miner_spec.erb +23 -0
- data/lib/generators/templates/network_miner.erb +32 -0
- data/lib/generators/templates/process_miner.erb +26 -1
- data/lib/generators/templates/twitter_miner.erb +111 -0
- data/lib/logging.rb +10 -3
- data/mines.gemspec +33 -0
- data/spec/logging_spec.rb +19 -0
- data/spec/mines_spec.rb +19 -0
- data/spec/redis_store_spec.rb +64 -0
- data/spec/utilities_spec.rb +26 -0
- metadata +45 -11
data/.gitignore
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
mines (0.0.1)
|
5
|
+
colored
|
6
|
+
hiredis
|
7
|
+
redis
|
8
|
+
thor
|
9
|
+
twitter
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
colored (1.2)
|
15
|
+
faraday (0.8.8)
|
16
|
+
multipart-post (~> 1.2.0)
|
17
|
+
hiredis (0.4.5)
|
18
|
+
minitest (5.0.6)
|
19
|
+
multi_json (1.7.9)
|
20
|
+
multipart-post (1.2.0)
|
21
|
+
rake (10.0.4)
|
22
|
+
redis (3.0.4)
|
23
|
+
simple_oauth (0.2.0)
|
24
|
+
thor (0.18.1)
|
25
|
+
twitter (4.8.1)
|
26
|
+
faraday (~> 0.8, < 0.10)
|
27
|
+
multi_json (~> 1.0)
|
28
|
+
simple_oauth (~> 0.2)
|
29
|
+
|
30
|
+
PLATFORMS
|
31
|
+
ruby
|
32
|
+
|
33
|
+
DEPENDENCIES
|
34
|
+
bundler (~> 1.3)
|
35
|
+
mines!
|
36
|
+
minitest
|
37
|
+
rake
|
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Ruby in Mines
|
2
|
+
|
3
|
+
Ruby in Mines is a ruby framework for creating data-mining application prototypes that focus on processing near real-time human generated content.
|
4
|
+
|
5
|
+
Install the gem and run with:
|
6
|
+
<tt>gem install mines </tt>
|
7
|
+
|
8
|
+
**Note:**
|
9
|
+
This project is under development. It has limited capabilities yet.
|
10
|
+
|
11
|
+
## Commands
|
12
|
+
|
13
|
+
* <tt>mines help </tt>
|
14
|
+
print help
|
15
|
+
|
16
|
+
* <tt>mines new AppName </tt>
|
17
|
+
create a new app in the current directory
|
18
|
+
|
19
|
+
* <tt>mines generate MinerName </tt>
|
20
|
+
generate a new miner
|
21
|
+
|
22
|
+
* <tt>mines start </tt>
|
23
|
+
start the miners, and restart them if source code changes
|
24
|
+
|
25
|
+
|
data/Rakefile
ADDED
data/bin/mines
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
require 'thor'
|
5
5
|
require_relative '../lib/utilities'
|
6
|
-
require_relative '../lib/logging'
|
6
|
+
#require_relative '../lib/logging'
|
7
7
|
require_relative '../lib/mines'
|
8
8
|
|
9
9
|
# require all generators in lib
|
@@ -16,7 +16,7 @@ module Mines
|
|
16
16
|
# invocates generators according to parameters
|
17
17
|
class Cli < Thor
|
18
18
|
|
19
|
-
include Logging
|
19
|
+
#include Logging
|
20
20
|
|
21
21
|
def initialize *args
|
22
22
|
super
|
@@ -28,17 +28,17 @@ module Mines
|
|
28
28
|
# @param name The name of the new application
|
29
29
|
def new(name)
|
30
30
|
ARGV.shift
|
31
|
-
log.debug "command 'new' with arguments: #{ARGV.join(', ')}"
|
31
|
+
#@log.debug "command 'new' with arguments: #{ARGV.join(', ')}"
|
32
32
|
Generator::Application.start ARGV
|
33
33
|
end
|
34
34
|
|
35
35
|
# help
|
36
36
|
def help *args
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
if args.empty?
|
38
|
+
#@log.debug "command 'help' with no arguments"
|
39
|
+
else
|
40
|
+
#@log.debug "command 'help' with arguments: #{args.join(', ').to_s}"
|
41
|
+
end
|
42
42
|
puts "Summary:"
|
43
43
|
super
|
44
44
|
puts "Now print my help :)\nYeah!"
|
data/config/help.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
Mines::Help.configure do |config|
|
4
|
+
config.new = { sum: "new APP_NAME" ,
|
5
|
+
desc: "Create Application APP_NAME" ,
|
6
|
+
help: ""
|
7
|
+
}
|
8
|
+
config.generate = { sum: "generate [twitter|network|process|metrics] NAME" ,
|
9
|
+
desc: "Create a twitter or network or process or metrics miner" ,
|
10
|
+
help: ""
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
data/examples/README.md
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gem 'minitest'
|
4
|
+
|
5
|
+
group :network do
|
6
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
7
|
+
gem 'tweetstream' # Twitter Streaming API library
|
8
|
+
end
|
9
|
+
|
10
|
+
group :process do
|
11
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
12
|
+
gem 'json'
|
13
|
+
gem 'time_series' , :git => 'git://github.com/panayiotis/time_series.git'
|
14
|
+
gem 'twitter'
|
15
|
+
gem 'awesome_print'
|
16
|
+
end
|
17
|
+
|
18
|
+
group :metrics do
|
19
|
+
gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
|
20
|
+
gem 'time_series' , :git => 'git://github.com/panayiotis/time_series.git'
|
21
|
+
gem 'json'
|
22
|
+
gem 'eventmachine'
|
23
|
+
gem 'awesome_print'
|
24
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
GIT
|
2
|
+
remote: git://github.com/panayiotis/mines.git
|
3
|
+
revision: c7fb4978b77d8aae68019be020e118557ecb86ea
|
4
|
+
specs:
|
5
|
+
mines (0.0.1)
|
6
|
+
colored
|
7
|
+
hiredis
|
8
|
+
redis
|
9
|
+
thor
|
10
|
+
twitter
|
11
|
+
|
12
|
+
GIT
|
13
|
+
remote: git://github.com/panayiotis/time_series.git
|
14
|
+
revision: 7c0896db0c88152bbe5394e149fcc1c746eb6b83
|
15
|
+
specs:
|
16
|
+
time_series (0.0.2)
|
17
|
+
activesupport
|
18
|
+
awesome_print
|
19
|
+
hiredis
|
20
|
+
redis
|
21
|
+
|
22
|
+
GEM
|
23
|
+
remote: https://rubygems.org/
|
24
|
+
specs:
|
25
|
+
activesupport (3.2.14)
|
26
|
+
i18n (~> 0.6, >= 0.6.4)
|
27
|
+
multi_json (~> 1.0)
|
28
|
+
addressable (2.3.5)
|
29
|
+
awesome_print (1.1.0)
|
30
|
+
colored (1.2)
|
31
|
+
cookiejar (0.3.0)
|
32
|
+
daemons (1.1.9)
|
33
|
+
em-http-request (1.0.3)
|
34
|
+
addressable (>= 2.2.3)
|
35
|
+
cookiejar
|
36
|
+
em-socksify
|
37
|
+
eventmachine (>= 1.0.0.beta.4)
|
38
|
+
http_parser.rb (>= 0.5.3)
|
39
|
+
em-socksify (0.3.0)
|
40
|
+
eventmachine (>= 1.0.0.beta.4)
|
41
|
+
em-twitter (0.2.2)
|
42
|
+
eventmachine (~> 1.0)
|
43
|
+
http_parser.rb (~> 0.5)
|
44
|
+
simple_oauth (~> 0.1)
|
45
|
+
eventmachine (1.0.3)
|
46
|
+
faraday (0.8.8)
|
47
|
+
multipart-post (~> 1.2.0)
|
48
|
+
hiredis (0.4.5)
|
49
|
+
http_parser.rb (0.5.3)
|
50
|
+
i18n (0.6.5)
|
51
|
+
json (1.8.0)
|
52
|
+
minitest (5.0.7)
|
53
|
+
multi_json (1.8.0)
|
54
|
+
multipart-post (1.2.0)
|
55
|
+
redis (3.0.4)
|
56
|
+
simple_oauth (0.2.0)
|
57
|
+
thor (0.18.1)
|
58
|
+
tweetstream (2.5.0)
|
59
|
+
daemons (~> 1.1)
|
60
|
+
em-http-request (~> 1.0.2)
|
61
|
+
em-twitter (~> 0.2)
|
62
|
+
twitter (~> 4.5)
|
63
|
+
yajl-ruby (~> 1.1)
|
64
|
+
twitter (4.8.1)
|
65
|
+
faraday (~> 0.8, < 0.10)
|
66
|
+
multi_json (~> 1.0)
|
67
|
+
simple_oauth (~> 0.2)
|
68
|
+
yajl-ruby (1.1.0)
|
69
|
+
|
70
|
+
PLATFORMS
|
71
|
+
ruby
|
72
|
+
|
73
|
+
DEPENDENCIES
|
74
|
+
awesome_print
|
75
|
+
eventmachine
|
76
|
+
json
|
77
|
+
mines!
|
78
|
+
minitest
|
79
|
+
time_series!
|
80
|
+
tweetstream
|
81
|
+
twitter
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Example Hashtags app
|
2
|
+
|
3
|
+
To create the basic directory structure and miners use the following commands
|
4
|
+
|
5
|
+
Create the app:
|
6
|
+
<tt>mines new hashtags</tt>
|
7
|
+
|
8
|
+
Create the twitter miner:
|
9
|
+
<tt>mines generate twitter publicstream</tt>
|
10
|
+
|
11
|
+
Create the process miner:
|
12
|
+
<tt>mines generate process process</tt>
|
13
|
+
|
14
|
+
Create the metrics miner:
|
15
|
+
<tt>mines generate metrics metrics</tt>
|
16
|
+
|
17
|
+
Resolve project's dependencies with bundler:
|
18
|
+
<tt>bundle install</tt>
|
19
|
+
|
20
|
+
Each miner is independent from the others,
|
21
|
+
they use Message Queues to communicate and a [TimeSeries metrics store](https://github.com/panayiotis/time_series)
|
22
|
+
to share data.
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
## Configuration file for your app
|
3
|
+
|
4
|
+
## Declare the configuration parameters you want to use
|
5
|
+
Application.config do
|
6
|
+
parameter :var1
|
7
|
+
parameter :var2
|
8
|
+
parameter :var3
|
9
|
+
parameter :logname
|
10
|
+
parameter :app_prefix
|
11
|
+
end
|
12
|
+
|
13
|
+
## Change their values
|
14
|
+
Application.config do
|
15
|
+
var1 "value"
|
16
|
+
var2 ['one','two','three']
|
17
|
+
var3 { :one=>1, :two=>2 }
|
18
|
+
logname "hashtags.log"
|
19
|
+
app_prefix "app"
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
## Initializer
|
24
|
+
## e.g. initialize a global variable
|
25
|
+
## $app_prefix variable needs to be global
|
26
|
+
$app_prefix = Application.app_prefix
|
27
|
+
|
28
|
+
|
29
|
+
## Usage example
|
30
|
+
# puts Application.var1
|
@@ -0,0 +1,157 @@
|
|
1
|
+
#!/bin/ruby
|
2
|
+
# coding: utf-8
|
3
|
+
|
4
|
+
## Gem dependencies are managed by Bundler inside project's Gemfile
|
5
|
+
require 'bundler'
|
6
|
+
Bundler.require :metrics
|
7
|
+
#require 'active_support/core_ext/date/calculations.rb'
|
8
|
+
|
9
|
+
## Initialize TimeSeries
|
10
|
+
## Data is been pushed constantly to the timeseries by
|
11
|
+
## the process miner
|
12
|
+
@ts = TimeSeries.new("hashtags")
|
13
|
+
@tweets_rate = TimeSeries.new("tweets_rate")
|
14
|
+
|
15
|
+
|
16
|
+
## Print strings in columns in the terminal
|
17
|
+
## the terminal needs to have the 'tput' command
|
18
|
+
def columnize *args
|
19
|
+
columns = args.size
|
20
|
+
width = Integer(`tput cols`) / columns
|
21
|
+
args.each{|a| print a.to_s.slice(0,width).ljust(width)}
|
22
|
+
puts ""
|
23
|
+
end
|
24
|
+
|
25
|
+
## Print strings in columns in the terminal
|
26
|
+
## the terminal needs to have the 'tput' command
|
27
|
+
def columnize_arrays *args
|
28
|
+
columns = args.size
|
29
|
+
width = Integer(`tput cols`) / columns
|
30
|
+
lines = 0
|
31
|
+
args.each { |a| lines = a.size - 1 if (a.size - 1 > lines)}
|
32
|
+
line = 0
|
33
|
+
lines.times do
|
34
|
+
args.each do |a|
|
35
|
+
unless a[line].nil?
|
36
|
+
print a[line][0].to_s.slice(0,width).ljust(width)
|
37
|
+
else
|
38
|
+
print '-'.ljust(width)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
puts ""
|
42
|
+
line+=1
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
## Print info and results to the terminal
|
48
|
+
def refresh_terminal
|
49
|
+
`reset`
|
50
|
+
|
51
|
+
## General info
|
52
|
+
puts Time.now.to_s
|
53
|
+
info = @ts.redis.info
|
54
|
+
print "Redis: "
|
55
|
+
puts info["db0"]
|
56
|
+
print "Redis Memory: "
|
57
|
+
puts info["used_memory_human"]
|
58
|
+
|
59
|
+
## Timeseries info
|
60
|
+
print "TimeSeries Keys: "
|
61
|
+
puts @ts.keys.size
|
62
|
+
print "TimeSeries resolution: "
|
63
|
+
puts @ts.resolution
|
64
|
+
puts ""
|
65
|
+
print "Tweets per minute: "
|
66
|
+
puts @tweets_rate.last["all"]
|
67
|
+
|
68
|
+
|
69
|
+
## Print a table about hashtags popularity during the
|
70
|
+
## last minute
|
71
|
+
width = Integer(`tput cols`)
|
72
|
+
puts "Trending hashtags during the last minute".center(width)
|
73
|
+
puts '-'*width
|
74
|
+
columnize "Hashtag","Last Minute", "Prev Minute","Last Hour", "Prev Hour", "Today", "Yesterday"
|
75
|
+
puts '-'*width
|
76
|
+
last = @ts.last_key.get
|
77
|
+
previous = @ts.previous_key.get
|
78
|
+
last_hour = @ts.hour(Time.now)
|
79
|
+
previous_hour = @ts.hour(Time.now - 1.hour)
|
80
|
+
today = @ts.day(Date.today)
|
81
|
+
yesterday = @ts.day(Date.yesterday)
|
82
|
+
|
83
|
+
if @ts.last_key.persistant?
|
84
|
+
last.each do |hashtag,value|
|
85
|
+
if previous.has_key? hashtag
|
86
|
+
previous_value = '%.2f' % previous[hashtag]
|
87
|
+
else
|
88
|
+
previous_value = "-"
|
89
|
+
end
|
90
|
+
|
91
|
+
if last_hour.has_key? hashtag
|
92
|
+
last_hour_value = '%.2f' % last_hour[hashtag]
|
93
|
+
else
|
94
|
+
last_hour_value = "-"
|
95
|
+
end
|
96
|
+
|
97
|
+
if previous_hour.has_key? hashtag
|
98
|
+
previous_hour_value = '%.2f' % previous_hour[hashtag]
|
99
|
+
else
|
100
|
+
previous_hour_value = "-"
|
101
|
+
end
|
102
|
+
|
103
|
+
if today.has_key? hashtag
|
104
|
+
today_value = '%.2f' % today[hashtag]
|
105
|
+
else
|
106
|
+
today_value = "-"
|
107
|
+
end
|
108
|
+
|
109
|
+
if yesterday.has_key? hashtag
|
110
|
+
yesterday_value = '%.2f' % yesterday[hashtag]
|
111
|
+
else
|
112
|
+
yesterday_value = "-"
|
113
|
+
end
|
114
|
+
columnize( hashtag, '%.2f' % value,
|
115
|
+
previous_value,
|
116
|
+
last_hour_value,
|
117
|
+
previous_hour_value,
|
118
|
+
today_value,
|
119
|
+
yesterday_value
|
120
|
+
)
|
121
|
+
end
|
122
|
+
else
|
123
|
+
puts "(no hashtags during the last minute)".center(width)
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
## Print a table about hashtags popularity during the
|
129
|
+
## various time periods
|
130
|
+
puts "\n\n"
|
131
|
+
puts "Popular Hashtags".center(width)
|
132
|
+
puts '-'*width
|
133
|
+
columnize "Last Minute", "Prev Minute","Last Hour",
|
134
|
+
"Prev Hour", "Today", "Yesterday"
|
135
|
+
puts '-'*width
|
136
|
+
last = last.sort_by { |k, v| v }.reverse.slice(0,15)
|
137
|
+
previous = previous.sort_by { |k, v| v }.reverse.slice(0,15)
|
138
|
+
last_hour = last_hour.sort_by { |k, v| v }.reverse.slice(0,15)
|
139
|
+
previous_hour = previous_hour.sort_by { |k, v| v }.reverse.slice(0,15)
|
140
|
+
today = today.sort_by { |k, v| v }.reverse.slice(0,15)
|
141
|
+
yesterday = yesterday.sort_by { |k, v| v }.reverse.slice(0,15)
|
142
|
+
|
143
|
+
columnize_arrays last, previous, last_hour,
|
144
|
+
previous_hour, today, yesterday
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
refresh_terminal
|
149
|
+
|
150
|
+
## Start Event Machine
|
151
|
+
EM.run do
|
152
|
+
|
153
|
+
## Periodic execution
|
154
|
+
EM::PeriodicTimer.new(10){
|
155
|
+
refresh_terminal
|
156
|
+
}
|
157
|
+
end # end EM
|