mines 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/.gitignore +7 -0
  2. data/.yardopts +6 -0
  3. data/Gemfile +5 -0
  4. data/Gemfile.lock +37 -0
  5. data/README.md +25 -0
  6. data/Rakefile +8 -0
  7. data/bin/mines +8 -8
  8. data/config/application.rb +11 -0
  9. data/config/help.rb +13 -0
  10. data/examples/README.md +4 -0
  11. data/examples/hashtags/Gemfile +24 -0
  12. data/examples/hashtags/Gemfile.lock +81 -0
  13. data/examples/hashtags/README.md +22 -0
  14. data/examples/hashtags/config/application.rb +30 -0
  15. data/examples/hashtags/miners/metrics.rb +157 -0
  16. data/examples/hashtags/miners/process.rb +46 -0
  17. data/examples/hashtags/miners/publicstream.rb +108 -0
  18. data/examples/hashtags/spec/metrics_spec.rb +23 -0
  19. data/examples/hashtags/spec/process_spec.rb +23 -0
  20. data/examples/hashtags/spec/publicstream_spec.rb +23 -0
  21. data/lib/generators/README.md +13 -0
  22. data/lib/generators/application.rb +3 -2
  23. data/lib/generators/miner.rb +10 -2
  24. data/lib/generators/templates/Gemfile.erb +24 -0
  25. data/lib/generators/templates/README.md +24 -0
  26. data/lib/generators/templates/application_config.erb +11 -0
  27. data/lib/generators/templates/metrics_miner.erb +28 -0
  28. data/lib/generators/templates/miner_spec.erb +23 -0
  29. data/lib/generators/templates/network_miner.erb +32 -0
  30. data/lib/generators/templates/process_miner.erb +26 -1
  31. data/lib/generators/templates/twitter_miner.erb +111 -0
  32. data/lib/logging.rb +10 -3
  33. data/mines.gemspec +33 -0
  34. data/spec/logging_spec.rb +19 -0
  35. data/spec/mines_spec.rb +19 -0
  36. data/spec/redis_store_spec.rb +64 -0
  37. data/spec/utilities_spec.rb +26 -0
  38. metadata +45 -11
@@ -0,0 +1,46 @@
1
+ #!/bin/ruby
2
+ # coding: utf-8
3
+
4
+ ## Gem dependencies are managed by Bundler inside project's Gemfile
5
+ require 'bundler'
6
+ Bundler.require :process
7
+
8
+ @log.info "Process miner pid: #{Process.pid}"
9
+
10
+ ## Connect to an Object Queue
11
+ ## elementes are pushed to this queue by the network miner
12
+ queue = ObjectQueue.new "publicstream"
13
+
14
+ counter = Counter.new "original"
15
+
16
+ channel = MessageChannel.new("web") # example channel
17
+
18
+ ## Create a timeseries metrics store to
19
+ ## save the occurence rate for each tag for each minute
20
+ ts = TimeSeries.new("hashtags")
21
+
22
+ ## Create a timeseries metrics store to
23
+ ## save the total number of tweets each minute
24
+ tweets_rate = TimeSeries.new("tweets_rate")
25
+
26
+ ## Start the process loop
27
+ loop {
28
+ ## Get en element from the queue
29
+ status = queue.pop
30
+ tweets_rate.push "all"
31
+
32
+ ## Filter out Retweets and Replies
33
+ unless status.text =~ /^RT/ or
34
+ status.retweet_count > 0 or
35
+ status.text =~ /^@/
36
+
37
+ print_tweet(status.user.screen_name, status.text)
38
+
39
+ status.hashtags.each do |hashtag|
40
+ ts.push (hashtag.text)
41
+ end
42
+ counter.incr
43
+ channel.publish({:user => status.user.screen_name, :text => status.text}.to_json)
44
+ #web.publish({:user => status.user.screen_name, :text => status.text.gsub(/(https?:\/\/\S+)/, '<a href="\1" target="_blank">\1</a> ').to_s}.to_json)
45
+ end
46
+ } # end loop
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ ## Gem dependencies are managed by Bundler inside project's Gemfile
5
+ require 'bundler'
6
+ Bundler.require :network
7
+
8
+ ## Create an Object Queue
9
+ queue = ObjectQueue.new "publicstream"
10
+
11
+ ## Provide your Twitter Streaming API keys and credentials
12
+ ##
13
+ ## example:
14
+ # TweetStream.configure do |config|
15
+ # config.consumer_key = "mZ0gsfwgA"
16
+ # config.consumer_secret = "nqwQqELrvdb6C"
17
+ # config.oauth_token = "OfIUtfTmP6NQuwWKlH"
18
+ # config.oauth_token_secret = 'u4bFMpQxfgwZl'
19
+ # config.auth_method = :oauth
20
+ # end
21
+
22
+ ## It is recommended to provide your credentials
23
+ ## in a different file outside version control system
24
+ ## to avoid publishing them.
25
+ ## example:
26
+ require_relative '../../twitter_credentials'
27
+
28
+ ## Initialize TweetStream Client
29
+ client = TweetStream::Client.new
30
+
31
+ ## Print custom messages when client receives special event messages
32
+ ## See also: https://dev.twitter.com/docs/streaming-apis/messages
33
+ client.on_error do |message|
34
+ msg = "\nError message: #{message}\n"+
35
+ "An HTTP error is encountered in the processing of the stream. "+
36
+ "Note that TweetStream will automatically try to reconnect, "+
37
+ "this is for reference only. Don't panic!"
38
+ puts red msg
39
+ @lod.error msg
40
+ end
41
+ client.on_unauthorized do
42
+ msg = "\nUnauthorized.\n"+
43
+ "An HTTP status 401 is encountered while connecting to Twitter. "+
44
+ "This could happen when system clock drift has occured."
45
+ puts red msg
46
+ @log.error msg
47
+ end
48
+ client.on_inited do
49
+ msg = "Connection Established"
50
+ puts green msg
51
+ @log.info msg
52
+ end
53
+ client.on_no_data_received do
54
+ msg = "\nNo data was received from the server and a stall occurred. "+
55
+ "Twitter defines this to be 90 seconds."
56
+ puts red msg
57
+ @log.info msg
58
+ end
59
+ client.on_delete do |status_id, user_id|
60
+ msg = "\nStatus deletion notice: user: #{user_id} status: #{status_id}"
61
+ puts yellow msg
62
+ @log.info msg
63
+ end
64
+ client.on_reconnect do |timeout, retries|
65
+ msg = "Reconnect Timeout: #{timeout} retries: #{retries}"
66
+ puts red msg
67
+ @log.info msg
68
+ end
69
+ client.on_limit do |skip_count|
70
+ msg = "\nA rate limit notice is received from the Twitter stream.\n"+
71
+ "Discarded Count: #{skip_count}"
72
+ puts yellow msg
73
+ @log.info msg
74
+ end
75
+ client.on_enhance_your_calm do
76
+ msg = "\nEnhance your calm"
77
+ puts yellow msg
78
+ @log.info msg
79
+ end
80
+
81
+ ## Make a call to the statuses/filter method of the Streaming API,
82
+ ## you may provide :follow, :track or :locations.
83
+ ##
84
+ ## :track Phrases of keywords to track
85
+ ## :follow A list of user IDs, indicating the
86
+ ## users to return statuses for in the stream.
87
+ ## :locations Specifies a set of bounding boxes to track
88
+ ## e.g. :locations => [-122.75,36.8,-121.75,37.8,-74,40,-73,41]
89
+ ##
90
+ client.filter(:track => %w( μια το να και το του της τα τι
91
+ για δεν στο στον στην με από απο
92
+ θα τις οι μου) ) do |status|
93
+ #client.filter(:track => %w( hi ) ) do |status|
94
+ ## status is an object of class Tweet
95
+
96
+ ## Print the user's screen name
97
+ # puts blue status.user.screen_name
98
+
99
+ ## Print the tweet's text
100
+ # puts green status.text
101
+
102
+ ## Print a blue dot to indicate a tweet has been received
103
+ print blue "."
104
+
105
+ ## Push the status to the object queue
106
+ ## the object queue will marshal the object so
107
+ queue.push(status)
108
+ end
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ require "minitest/autorun"
5
+ require_relative '../miners/metrics.rb'
6
+
7
+ describe "metrics Miner" do
8
+
9
+ before do
10
+ end
11
+
12
+ after do
13
+ end
14
+
15
+ it "metrics does this" do
16
+ true.must_equal true
17
+ end
18
+
19
+ it "metrics does that" do
20
+ true.must_equal true
21
+ end
22
+
23
+ end
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ require "minitest/autorun"
5
+ require_relative '../miners/process.rb'
6
+
7
+ describe "process Miner" do
8
+
9
+ before do
10
+ end
11
+
12
+ after do
13
+ end
14
+
15
+ it "process does this" do
16
+ true.must_equal true
17
+ end
18
+
19
+ it "process does that" do
20
+ true.must_equal true
21
+ end
22
+
23
+ end
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ require "minitest/autorun"
5
+ require_relative '../miners/publicstream.rb'
6
+
7
+ describe "publicstream Miner" do
8
+
9
+ before do
10
+ end
11
+
12
+ after do
13
+ end
14
+
15
+ it "publicstream does this" do
16
+ true.must_equal true
17
+ end
18
+
19
+ it "publicstream does that" do
20
+ true.must_equal true
21
+ end
22
+
23
+ end
@@ -0,0 +1,13 @@
1
+ ### Generators
2
+
3
+ Generators are used to create directories, files with boilerplate code etc
4
+
5
+
6
+ #### Generators
7
+
8
+ * **Application**
9
+ creates the default file structure and prints appropriate messages
10
+
11
+ * **Miner**
12
+ creates spesific miners based on templates
13
+
@@ -14,7 +14,7 @@ module Mines::Generator
14
14
  # Accepts as an argument the name of the new application. A new directory with this name will be created
15
15
  class Application < Thor::Group
16
16
  include Thor::Actions
17
- include Mines::Logging
17
+ #include Mines::Logging
18
18
 
19
19
  # Define arguments and options
20
20
  argument :name
@@ -29,13 +29,14 @@ module Mines::Generator
29
29
  # - Files: TODO
30
30
  # also prints appropriate messages
31
31
  def create_directory_structure
32
- log.info "Creating directory structure in dir: " + name
32
+ #@log.info "Creating directory structure in dir: " + name
33
33
  empty_directory name
34
34
  empty_directory name + "/miners"
35
35
  empty_directory name + "/lib"
36
36
  empty_directory name + "/log"
37
37
  empty_directory name + "/config"
38
38
  template "templates/application_config.erb", "#{name}/config/application.rb"
39
+ template "templates/Gemfile.erb", "#{name}/Gemfile"
39
40
  puts "Application created successfully! ".green
40
41
  print "Type "
41
42
  print "'cd #{name}' ".yellow
@@ -13,7 +13,7 @@ module Mines::Generator
13
13
  # - Metrics
14
14
  class Miner < Thor::Group
15
15
  include Thor::Actions
16
- include Mines::Logging
16
+ #include Mines::Logging
17
17
 
18
18
  # Define arguments and options
19
19
  argument :type, :desc => "The type of the miner, [Network,Process,Metrics]", :required => true
@@ -32,7 +32,15 @@ module Mines::Generator
32
32
  #puts options[:evented]
33
33
  template "templates/#{type}_miner.erb", "miners/#{name}.rb"
34
34
  end
35
-
35
+
36
+ # Use the appropriate template file according to type
37
+ # and put the generated file in miners directory
38
+ def create_miner_spec
39
+ puts "Copy miner spec template"
40
+ #puts options[:evented]
41
+ template "templates/miner_spec.erb", "spec/#{name}_spec.rb"
42
+ end
43
+
36
44
  end # class
37
45
 
38
46
  end # module
@@ -0,0 +1,24 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'minitest'
4
+
5
+ group :network do
6
+ gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
7
+ gem 'tweetstream' # Twitter Streaming API library
8
+ end
9
+
10
+ group :process do
11
+ gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
12
+ gem 'json'
13
+ gem 'time_series' , :git => 'git://github.com/panayiotis/time_series.git'
14
+ gem 'twitter'
15
+ gem 'awesome_print'
16
+ end
17
+
18
+ group :metrics do
19
+ gem 'mines' , :git => 'git://github.com/panayiotis/mines.git'
20
+ gem 'time_series' , :git => 'git://github.com/panayiotis/time_series.git'
21
+ gem 'json'
22
+ gem 'eventmachine'
23
+ gem 'awesome_print'
24
+ end
@@ -0,0 +1,24 @@
1
+ ### Templates
2
+
3
+ Files with boilerplate code are created based on these templates
4
+
5
+
6
+ #### Templates
7
+
8
+ * **network miner**
9
+ Boilerplate code for the network miner. Will be placed in miners directory.
10
+
11
+ * **process miner**
12
+ Boilerplate code for the process miner. Will be placed in miners directory.
13
+
14
+ * **twitter miner**
15
+ Boilerplate code for the twitter miner. Will be placed in miners directory.
16
+
17
+ * **metrics miner**
18
+ Boilerplate code for the metrics miner. Will be placed in miners directory.
19
+
20
+ * **application config**
21
+ Application configuration. Will be placed in config directory.
22
+
23
+ * **minner spec**
24
+ Miner spec. Will be created for each miner and placed in spec directory.
@@ -6,6 +6,8 @@ Application.config do
6
6
  parameter :var1
7
7
  parameter :var2
8
8
  parameter :var3
9
+ parameter :logname
10
+ parameter :app_prefix
9
11
  end
10
12
 
11
13
  ## Change their values
@@ -13,7 +15,16 @@ Application.config do
13
15
  var1 "value"
14
16
  var2 ['one','two','three']
15
17
  var3 { :one=>1, :two=>2 }
18
+ logname "<%= name %>.log"
19
+ app_prefix "<%= name %>"
16
20
  end
17
21
 
22
+
23
+ ## Initializer
24
+ ## e.g. initialize a global variable
25
+ ## $app_prefix variable needs to be global
26
+ $app_prefix = Application.app_prefix
27
+
28
+
18
29
  ## Usage example
19
30
  # puts Application.var1
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ ## Gem dependencies are managed by Bundler inside project's Gemfile
5
+ require 'bundler'
6
+ Bundler.require :network
7
+
8
+ log_name :<%= name %>
9
+
10
+ ## Connect to a TimeSeries metrics store
11
+ ## Data is been pushed constantly to the timeseries by
12
+ ## the process miner
13
+ @ts = TimeSeries.new("<%= name %>")
14
+
15
+
16
+ ## Start Event Machine
17
+ EM.run do
18
+
19
+ ## Periodic execution
20
+ EM::PeriodicTimer.new(13){
21
+ print blue '.'
22
+ }
23
+
24
+ ## Periodic execution
25
+ EM::PeriodicTimer.new(1){
26
+ print red '.'
27
+ }
28
+ end # end EM
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ require "minitest/autorun"
5
+ require_relative '../miners/<%= name %>.rb'
6
+
7
+ describe "<%= name %> Miner" do
8
+
9
+ before do
10
+ end
11
+
12
+ after do
13
+ end
14
+
15
+ it "<%= name %> does this" do
16
+ true.must_equal true
17
+ end
18
+
19
+ it "<%= name %> does that" do
20
+ true.must_equal true
21
+ end
22
+
23
+ end
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ ## Gem dependencies are managed by Bundler inside project's Gemfile
5
+ require 'bundler'
6
+ Bundler.require :network
7
+
8
+ #log_name :<%= name %>
9
+
10
+ ## Use event Machine for non-blocking IO
11
+ ## or Threads for blocking IO
12
+
13
+ ## Create Thread example
14
+ Thread.new { loop { print green '.'; `sleep 1` } }
15
+
16
+ # Start Event Machine
17
+ EM.run do
18
+
19
+ ## Periodic execution
20
+ EM::PeriodicTimer.new(1){
21
+ print blue '.'
22
+ ## Avoid blocking commands inside the Reactor.
23
+ ## This 'sleep' is blocking the other
24
+ ## timer too. Use Threads instead.
25
+ `sleep 1`
26
+ }
27
+
28
+ ## Periodic execution
29
+ EM::PeriodicTimer.new(0.5){
30
+ print red '.'
31
+ }
32
+ end # end EM