url_tracker 1.0 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,109 @@
1
+ module UrlTracker
2
+
3
+ # Class who deals with requesting information to the server, such as
4
+ # track a new URL, list all currently tracked links, stop tracking something, etc.
5
+ class Client
6
+ include SocketCommunication
7
+
8
+ require 'optparse'
9
+ require 'ostruct'
10
+
11
+ def initialize(socket_file = '/tmp/_ut.sock')
12
+ connect(socket_file)
13
+ rescue Errno::ENOENT
14
+ STDERR.puts 'Connection error. Is the server running?'
15
+ exit(1)
16
+ end
17
+
18
+ # Sends a message to the server asking to track a new URL. Format
19
+ # of the message:
20
+ #
21
+ # "track {{URL}}"
22
+ def track(url)
23
+ write("track #{url}")
24
+ next_message == 'ok'
25
+ end
26
+
27
+ # Asks the server for all the URLs currently being tracked. Expects a
28
+ # string back, with URLs separated by commas.
29
+ def list
30
+ write('list')
31
+ next_message.split(',')
32
+ end
33
+
34
+ # Tells the server to stop tracking the given URL. Returns true if the
35
+ # operation was successful
36
+ def release(url)
37
+ write("release #{url}")
38
+ next_message == 'ok'
39
+ end
40
+
41
+ # Tells the server to shutdown
42
+ def shutdown
43
+ write('shutdown')
44
+ end
45
+
46
+ # Calls one of the methods above according to the options passed.
47
+ # Available options:
48
+ #
49
+ # -t, --track URL #=> Starts tracking URL
50
+ # -l, --list #=> List currently tracked URLs
51
+ # -r, --release URL #=> Releases URL, not tracking it any more
52
+ #
53
+ # +params+ can also be a hash, in which case it will be considered already parsed.
54
+ def run(params)
55
+ options = parse(params)
56
+
57
+ output = case options.action
58
+ when :track then track(options.url)
59
+ when :list then list
60
+ when :release then release(options.url)
61
+ end
62
+ end
63
+
64
+
65
+ private
66
+
67
+ def parse(argv)
68
+ return OpenStruct.new(argv) if argv.kind_of?(Hash)
69
+
70
+ options = OpenStruct.new
71
+ options.action = :nothing
72
+
73
+ opts = OptionParser.new do |opts|
74
+ opts.banner = 'Usage: ut [options]'
75
+ opts.separator ''
76
+ opts.separator 'Available options:'
77
+
78
+ opts.on('-t', '--track URL', 'Start tracking URL') do |url|
79
+ options.url = prepare_url(url)
80
+ options.action = :track
81
+ end
82
+
83
+ opts.on('-l', '--list', 'List currently tracked URLs') do |list|
84
+ options.action = :list
85
+ end
86
+
87
+ opts.on('-r', '--release URL', 'Release URL, not tracking it any more') do |url|
88
+ options.url = url
89
+ options.action = :release
90
+ end
91
+
92
+ opts.on_tail('-h', '--help', 'Show this message') do
93
+ puts opts
94
+ exit(0)
95
+ end
96
+ end
97
+
98
+ opts.parse!(argv)
99
+
100
+ options
101
+ end
102
+
103
+ def prepare_url(url)
104
+ url.tap { url.prepend('http://') unless url.start_with?('http://') }
105
+ end
106
+
107
+ end
108
+
109
+ end
@@ -0,0 +1,69 @@
1
+ require 'net/http'
2
+
3
+ module UrlTracker
4
+
5
+ # Class representing a single web page to be tracked. It is capable of fetching
6
+ # the page's content and verifying if it was changed since last time it was fetched
7
+ class Page
8
+ attr_reader :uri
9
+
10
+ # Creates a new instance of UrlTracker::Page. The first argument is the URI that
11
+ # corresponds to the page and will be lazily fetched. The second parameter
12
+ # is an object that is responsible for fetching the page itself. It mus respond
13
+ # to the +get+ method with the given +uri+ parameter and return a string
14
+ # with the page contents; this parameter defaults to +Net::HTTP+, so you should
15
+ # by default pass +uri+ as an instance of +URI::Generic+
16
+ def initialize(uri, page_fetcher = Net::HTTP)
17
+ @uri = uri.dup
18
+ @page_fetcher = page_fetcher
19
+ end
20
+
21
+ # Returns a string containing the page content. If not yet fetched, this method
22
+ # will fetch the page for you.
23
+ def content
24
+ @content ||= fetch
25
+ end
26
+
27
+ # This method returns a string containing the page content, but always fetches
28
+ # the page again
29
+ def content!
30
+ @content = fetch
31
+ end
32
+
33
+ # Verifies if a page has changed since last the last time it was fetched
34
+ def changed?
35
+ if @content # we have a cached copy
36
+ old_content = @content
37
+ @content = fetch
38
+ @content != old_content
39
+ else
40
+ @content = fetch
41
+ false
42
+ end
43
+ end
44
+
45
+ # Two pages are considered the same if they have the same URI. Right, that
46
+ # might not be true if the content is different (which shouldn't if you are
47
+ # building a RESTful service), but we will just ignore that and pretend we
48
+ # we live in a better world.
49
+ def eql?(other)
50
+ @uri.eql?(other.uri)
51
+ end
52
+
53
+ def ==(other)
54
+ @uri == other.uri
55
+ end
56
+
57
+ def hash
58
+ @uri.hash
59
+ end
60
+
61
+ private
62
+
63
+ def fetch
64
+ @page_fetcher.get(@uri)
65
+ end
66
+
67
+ end
68
+
69
+ end
@@ -0,0 +1,129 @@
1
+ module UrlTracker
2
+
3
+ require 'eventmachine'
4
+
5
+ # Small class wrapping EventMachine calls to programatically
6
+ # execute code blocks.
7
+ class Periodic
8
+
9
+ # maybe consider :day in the future
10
+ TIME_UNITS = {
11
+ minute: 60,
12
+ minutes: 60,
13
+ hour: 60*60,
14
+ hours: 60*60
15
+ }
16
+
17
+ # Creates a new instance of UrlTracker::Periodic and starts the event loop.
18
+ def initialize
19
+ @named_tasks = {}
20
+ start_event_loop
21
+ end
22
+
23
+ # Register a new task to be executed in a specified amount of time.
24
+ # Examples:
25
+ #
26
+ # p = UrlTracker::Periodic.new
27
+ # p.every(:minute) { do_something } #=> executed every minute
28
+ # p.every(2, :minutes) { do_something } #=> executed every 2 minutes
29
+ # p.every(4, :hours) { do_something } #=> executed every 4 hours
30
+ def every(*args, &block)
31
+ time = 1
32
+
33
+ case args.first
34
+ when Integer then time = args[0]*seconds_for(args[1])
35
+ when Symbol then time *= seconds_for(args[0])
36
+ else raise "Invalid period #{args[0].inspect}"
37
+ end
38
+
39
+ task = { every: time, task: block }
40
+ task.merge!(name: @name) if named_task?
41
+
42
+ schedule_task(task)
43
+ @name = nil
44
+
45
+ time
46
+ end
47
+
48
+ # Returns named tasks registered.
49
+ # Example
50
+ #
51
+ # p = UrlTracker::Periodic.new
52
+ # p.task(:foo).every(:minute) { do_something}
53
+ # p.task(:bar).every(2, :minute) { do_other_thing }
54
+ # p.named_tasks #=> [:foo, :bar]
55
+ def named_tasks
56
+ task_names
57
+ end
58
+
59
+ # Removes a task named +name+, so that it will no longer run
60
+ def remove_task(name)
61
+ raise "Unregistered task #{name.inspect}" unless @named_tasks.include?(name)
62
+
63
+ unschedule_task(name)
64
+ end
65
+
66
+ # Restarts the event loop
67
+ def restart
68
+ stop if running?
69
+ start_event_loop
70
+ end
71
+
72
+ # Checks if the tasks are running
73
+ def running?
74
+ @event_thread.alive?
75
+ end
76
+
77
+ # Stop all scheduled tasks
78
+ def stop
79
+ @event_thread.terminate
80
+ @event_thread.join
81
+ end
82
+
83
+ # Used for creating named tasks or, in other words, tasks that can be removed
84
+ # later using #remove
85
+ def task(name)
86
+ @name = name.to_s
87
+ self
88
+ end
89
+
90
+ private
91
+
92
+ def named_task?
93
+ !@name.nil?
94
+ end
95
+
96
+ def seconds_for(time_unit)
97
+ raise "Unkown time unit #{time_unit.inspect}" unless TIME_UNITS.include?(time_unit)
98
+ TIME_UNITS[time_unit]
99
+ end
100
+
101
+ # +task+ is expected to be in the format:
102
+ #
103
+ # { every: 60, task: #<Proc:0x...> }
104
+ #
105
+ # for a task to be run every minute, for example.
106
+ def schedule_task(t)
107
+ periodic_timer = EM.add_periodic_timer(t[:every], &t[:task])
108
+ @named_tasks[@name] = periodic_timer if named_task?
109
+ end
110
+
111
+ def start_event_loop
112
+ # start the event loop in a separate thread
113
+ @event_thread = Thread.new { EM.run }
114
+
115
+ # Wait for the reactor to be ready
116
+ while !@event_thread.stop?; end
117
+ end
118
+
119
+ def task_names
120
+ @named_tasks.keys
121
+ end
122
+
123
+ def unschedule_task(name)
124
+ EM.cancel_timer(@named_tasks.delete(name))
125
+ end
126
+
127
+ end
128
+
129
+ end
@@ -0,0 +1,150 @@
1
+ module UrlTracker
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+ require 'logger'
6
+ require 'pathname'
7
+
8
+ # Class that waits for messages on a given socket and responds back to clients.
9
+ # It can track URLs, list currently tracked URLs and stop tracking.
10
+ # The interval between consecutive checks for change in the pages is customizable.
11
+ class Server
12
+ include SocketCommunication
13
+
14
+ # Initializes a new server which logs its activities using the passed logger object.
15
+ # Defaults to Ruby's Logger class. Log level defaults to +Logger::INFO+ and you can
16
+ # override it by setting the URL_TRACKER_DEBUG environment variable
17
+ def initialize(logger=Logger.new(STDERR))
18
+ setup_signals
19
+ @logger = logger
20
+ @logger.level = level_from_env || Logger::INFO
21
+ end
22
+
23
+ # Main server method. Loops forever, waiting for new connections, and
24
+ # checking new messages. Takes the appropriate action according to what is
25
+ # received, such as a new URL to track.
26
+ def loop_forever
27
+ running = true
28
+
29
+ while wait_for_connection
30
+ @logger.info("New client connected")
31
+ command, *arguments = next_message.split
32
+ @logger.debug "#{command} received"
33
+ response = case command
34
+ when /^track$/i then track(arguments.first)
35
+ when /^list$/i then list
36
+ when /^release$/i then release(arguments.first)
37
+ end
38
+
39
+ write(response) unless response.nil?
40
+ end
41
+ rescue => e
42
+ @logger.error("An error occurred when waiting for new connections!\n\t#{e.inspect}\n\t#{e.backtrace.join("\n\t")}")
43
+ end
44
+
45
+ # Track an URL
46
+ def track(uri)
47
+ @logger.info("Tracking URL #{uri}")
48
+ UrlTracker.track_uri(uri)
49
+ end
50
+
51
+ # List tracked URLs
52
+ def list
53
+ UrlTracker.list_all
54
+ end
55
+
56
+ # Release an URL
57
+ def release(uri)
58
+ @logger.info("Releasing URL #{uri}")
59
+ UrlTracker.release_uri(uri)
60
+ end
61
+
62
+ # Runs the server, according to the argv options passed.
63
+ # Possible options:
64
+ #
65
+ # -s, --socket [FILE] #=> Uses FILE as socket file for communication
66
+ # -f, --fork #=> Forks and works as a daemon
67
+ #
68
+ # +params+ can also be a hash, containing the parsed information to the server.
69
+ def run(params)
70
+ @logger.info "UrlTracker #{UrlTracker::VERSION} starting. Log level is #{@logger.level.inspect}."
71
+
72
+ options = parse(params)
73
+ @socket_file = options.socket_file
74
+ @pid = nil
75
+
76
+ bind(@socket_file)
77
+
78
+ @logger.info "Server starting at socket #{Pathname.new(@socket_file).realpath.to_s}"
79
+
80
+ if options.fork
81
+ @pid = fork { loop_forever }
82
+ @logger.info "Forking to background. Child pid #{@pid}"
83
+ else
84
+ loop_forever
85
+ end
86
+ end
87
+
88
+ # Stops the current server
89
+ def stop
90
+ @pid ? Process.kill('TERM', @pid) : close_connection
91
+ end
92
+
93
+ private
94
+
95
+ def parse(argv)
96
+ return OpenStruct.new(argv) if argv.kind_of?(Hash)
97
+
98
+ options = OpenStruct.new
99
+ options.socket_file = '/tmp/_ut.sock'
100
+ options.fork = false
101
+
102
+ opts = OptionParser.new do |opts|
103
+ opts.banner = 'Usage: utd [options]'
104
+ opts.separator ''
105
+ opts.separator 'Available options:'
106
+
107
+ opts.on('-s', '--socket FILE', 'Uses FILE as a socket. Defaults to /tmp/_ut.sock') do |socket_file|
108
+ options.socket_file = socket_file
109
+ end
110
+
111
+ opts.on('-f', '--fork', 'Forks and works as a daemon') do |f|
112
+ options.fork = f
113
+ end
114
+
115
+ opts.on_tail('-h', '--help', 'Show this message') do
116
+ puts opts
117
+ exit(0)
118
+ end
119
+ end
120
+
121
+ opts.parse!(argv)
122
+
123
+ options
124
+ end
125
+
126
+ def setup_signals
127
+ ['INT', 'TERM', 'QUIT'].each do |signal|
128
+ Signal.trap(signal) do
129
+ stop
130
+ @logger.info "SIG#{signal} received. Bye."
131
+ exit(0)
132
+ end
133
+ end
134
+ end
135
+
136
+ # Retrieves log level from DEBUG environment variable
137
+ def level_from_env
138
+ case ENV['URL_TRACKER_DEBUG']
139
+ when 'debug' then Logger::DEBUG
140
+ when 'error' then Logger::ERROR
141
+ when 'fatal' then Logger::FATAL
142
+ when 'info' then Logger::INFO
143
+ when 'warning' then Logger::WARN
144
+ else nil
145
+ end
146
+ end
147
+
148
+ end
149
+
150
+ end
@@ -0,0 +1,75 @@
1
+ module UrlTracker
2
+
3
+ require 'socket'
4
+
5
+ # Implements communication via Unix sockets.
6
+ module SocketCommunication
7
+ attr_writer :path # path to the socket file
8
+
9
+ InvalidSocketError = Class.new(StandardError)
10
+
11
+ # Messages received cannot be longer than 1024 bytes
12
+ MAX_MESSAGE_LENGTH = 1024
13
+
14
+ # Max connections to be queued before accept
15
+ MAX_CONN_QUEUE = 10
16
+
17
+ # Connects to the Unix socket. Returns true if the connection was successful.
18
+ # Otherwise an exception is thrown. This method cannot be called if +bind+ was
19
+ # already called; neither can you call +bind+ if you call this method.
20
+ def connect(path)
21
+ raise_socket_error_if { defined? @socket }
22
+ @socket = Socket.new(:UNIX, :SOCK_STREAM, 0)
23
+ @socket.connect addrinfo_for(path)
24
+ true
25
+ end
26
+
27
+ # Binds the given path, creating a Unix socket. As with connect, you cannot use
28
+ # this method if already called +connect+ before. After this method is called,
29
+ # the socket will be waiting for connections.
30
+ def bind(path)
31
+ raise_socket_error_if { defined? @socket }
32
+ @socket_file = path
33
+ @socket = Socket.new(:UNIX, :SOCK_STREAM, 0)
34
+ @socket.bind addrinfo_for(@socket_file)
35
+ @socket.listen(MAX_CONN_QUEUE)
36
+ true
37
+ end
38
+
39
+ # Waits for a connection in the binded socket
40
+ def wait_for_connection
41
+ @current_client = @socket.accept.first
42
+ end
43
+
44
+ # Writes to the socket, returning the number of bytes sent. This method can
45
+ # only be called before +connect+ or +wait_for_connection+, otherwise you
46
+ # will get an exception
47
+ def write(message)
48
+ socket = (defined? @current_client) ? @current_client : @socket
49
+ socket.send(message, 0)
50
+ end
51
+
52
+ # Waits for a message. Blocks until it is received.
53
+ def next_message
54
+ socket = (defined? @current_client) ? @current_client : @socket
55
+ socket.recvfrom(MAX_MESSAGE_LENGTH).first
56
+ end
57
+
58
+ def close_connection
59
+ defined?(@socket) && !@socket.closed? && @socket.close
60
+ File.unlink(@socket_file) if defined?(@socket_file) && File.exists?(@socket_file)
61
+ end
62
+
63
+ private
64
+
65
+ def raise_socket_error_if(&block)
66
+ raise InvalidSocketError if block.call
67
+ end
68
+
69
+ def addrinfo_for(path)
70
+ Addrinfo.unix(path)
71
+ end
72
+
73
+ end
74
+
75
+ end
@@ -0,0 +1,5 @@
1
+ module UrlTracker
2
+
3
+ VERSION = '1.1'
4
+
5
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_tracker
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.0'
4
+ version: '1.1'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -68,6 +68,12 @@ extensions: []
68
68
  extra_rdoc_files: []
69
69
  files:
70
70
  - lib/url_tracker.rb
71
+ - lib/url_tracker/socket_communication.rb
72
+ - lib/url_tracker/server.rb
73
+ - lib/url_tracker/periodic.rb
74
+ - lib/url_tracker/client.rb
75
+ - lib/url_tracker/page.rb
76
+ - lib/url_tracker/version.rb
71
77
  - bin/ut
72
78
  - bin/utd
73
79
  - test/test_server.rb
@@ -91,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
91
97
  version: '0'
92
98
  segments:
93
99
  - 0
94
- hash: -1259474569283625211
100
+ hash: -1434435919243475892
95
101
  required_rubygems_version: !ruby/object:Gem::Requirement
96
102
  none: false
97
103
  requirements:
@@ -100,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
106
  version: '0'
101
107
  segments:
102
108
  - 0
103
- hash: -1259474569283625211
109
+ hash: -1434435919243475892
104
110
  requirements: []
105
111
  rubyforge_project:
106
112
  rubygems_version: 1.8.23