url_tracker 1.0 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/url_tracker/client.rb +109 -0
- data/lib/url_tracker/page.rb +69 -0
- data/lib/url_tracker/periodic.rb +129 -0
- data/lib/url_tracker/server.rb +150 -0
- data/lib/url_tracker/socket_communication.rb +75 -0
- data/lib/url_tracker/version.rb +5 -0
- metadata +9 -3
@@ -0,0 +1,109 @@
|
|
1
|
+
module UrlTracker
|
2
|
+
|
3
|
+
# Class who deals with requesting information to the server, such as
|
4
|
+
# track a new URL, list all currently tracked links, stop tracking something, etc.
|
5
|
+
class Client
|
6
|
+
include SocketCommunication
|
7
|
+
|
8
|
+
require 'optparse'
|
9
|
+
require 'ostruct'
|
10
|
+
|
11
|
+
def initialize(socket_file = '/tmp/_ut.sock')
|
12
|
+
connect(socket_file)
|
13
|
+
rescue Errno::ENOENT
|
14
|
+
STDERR.puts 'Connection error. Is the server running?'
|
15
|
+
exit(1)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Sends a message to the server asking to track a new URL. Format
|
19
|
+
# of the message:
|
20
|
+
#
|
21
|
+
# "track {{URL}}"
|
22
|
+
def track(url)
|
23
|
+
write("track #{url}")
|
24
|
+
next_message == 'ok'
|
25
|
+
end
|
26
|
+
|
27
|
+
# Asks the server for all the URLs currently being tracked. Expects a
|
28
|
+
# string back, with URLs separated by commas.
|
29
|
+
def list
|
30
|
+
write('list')
|
31
|
+
next_message.split(',')
|
32
|
+
end
|
33
|
+
|
34
|
+
# Tells the server to stop tracking the given URL. Returns true if the
|
35
|
+
# operation was successful
|
36
|
+
def release(url)
|
37
|
+
write("release #{url}")
|
38
|
+
next_message == 'ok'
|
39
|
+
end
|
40
|
+
|
41
|
+
# Tells the server to shutdown
|
42
|
+
def shutdown
|
43
|
+
write('shutdown')
|
44
|
+
end
|
45
|
+
|
46
|
+
# Calls one of the methods above according to the options passed.
|
47
|
+
# Available options:
|
48
|
+
#
|
49
|
+
# -t, --track URL #=> Starts tracking URL
|
50
|
+
# -l, --list #=> List currently tracked URLs
|
51
|
+
# -r, --release URL #=> Releases URL, not tracking it any more
|
52
|
+
#
|
53
|
+
# +params+ can also be a hash, in which case it will be considered already parsed.
|
54
|
+
def run(params)
|
55
|
+
options = parse(params)
|
56
|
+
|
57
|
+
output = case options.action
|
58
|
+
when :track then track(options.url)
|
59
|
+
when :list then list
|
60
|
+
when :release then release(options.url)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def parse(argv)
|
68
|
+
return OpenStruct.new(argv) if argv.kind_of?(Hash)
|
69
|
+
|
70
|
+
options = OpenStruct.new
|
71
|
+
options.action = :nothing
|
72
|
+
|
73
|
+
opts = OptionParser.new do |opts|
|
74
|
+
opts.banner = 'Usage: ut [options]'
|
75
|
+
opts.separator ''
|
76
|
+
opts.separator 'Available options:'
|
77
|
+
|
78
|
+
opts.on('-t', '--track URL', 'Start tracking URL') do |url|
|
79
|
+
options.url = prepare_url(url)
|
80
|
+
options.action = :track
|
81
|
+
end
|
82
|
+
|
83
|
+
opts.on('-l', '--list', 'List currently tracked URLs') do |list|
|
84
|
+
options.action = :list
|
85
|
+
end
|
86
|
+
|
87
|
+
opts.on('-r', '--release URL', 'Release URL, not tracking it any more') do |url|
|
88
|
+
options.url = url
|
89
|
+
options.action = :release
|
90
|
+
end
|
91
|
+
|
92
|
+
opts.on_tail('-h', '--help', 'Show this message') do
|
93
|
+
puts opts
|
94
|
+
exit(0)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
opts.parse!(argv)
|
99
|
+
|
100
|
+
options
|
101
|
+
end
|
102
|
+
|
103
|
+
def prepare_url(url)
|
104
|
+
url.tap { url.prepend('http://') unless url.start_with?('http://') }
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
module UrlTracker
|
4
|
+
|
5
|
+
# Class representing a single web page to be tracked. It is capable of fetching
|
6
|
+
# the page's content and verifying if it was changed since last time it was fetched
|
7
|
+
class Page
|
8
|
+
attr_reader :uri
|
9
|
+
|
10
|
+
# Creates a new instance of UrlTracker::Page. The first argument is the URI that
|
11
|
+
# corresponds to the page and will be lazily fetched. The second parameter
|
12
|
+
# is an object that is responsible for fetching the page itself. It mus respond
|
13
|
+
# to the +get+ method with the given +uri+ parameter and return a string
|
14
|
+
# with the page contents; this parameter defaults to +Net::HTTP+, so you should
|
15
|
+
# by default pass +uri+ as an instance of +URI::Generic+
|
16
|
+
def initialize(uri, page_fetcher = Net::HTTP)
|
17
|
+
@uri = uri.dup
|
18
|
+
@page_fetcher = page_fetcher
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns a string containing the page content. If not yet fetched, this method
|
22
|
+
# will fetch the page for you.
|
23
|
+
def content
|
24
|
+
@content ||= fetch
|
25
|
+
end
|
26
|
+
|
27
|
+
# This method returns a string containing the page content, but always fetches
|
28
|
+
# the page again
|
29
|
+
def content!
|
30
|
+
@content = fetch
|
31
|
+
end
|
32
|
+
|
33
|
+
# Verifies if a page has changed since last the last time it was fetched
|
34
|
+
def changed?
|
35
|
+
if @content # we have a cached copy
|
36
|
+
old_content = @content
|
37
|
+
@content = fetch
|
38
|
+
@content != old_content
|
39
|
+
else
|
40
|
+
@content = fetch
|
41
|
+
false
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Two pages are considered the same if they have the same URI. Right, that
|
46
|
+
# might not be true if the content is different (which shouldn't if you are
|
47
|
+
# building a RESTful service), but we will just ignore that and pretend we
|
48
|
+
# we live in a better world.
|
49
|
+
def eql?(other)
|
50
|
+
@uri.eql?(other.uri)
|
51
|
+
end
|
52
|
+
|
53
|
+
def ==(other)
|
54
|
+
@uri == other.uri
|
55
|
+
end
|
56
|
+
|
57
|
+
def hash
|
58
|
+
@uri.hash
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def fetch
|
64
|
+
@page_fetcher.get(@uri)
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
module UrlTracker
|
2
|
+
|
3
|
+
require 'eventmachine'
|
4
|
+
|
5
|
+
# Small class wrapping EventMachine calls to programatically
|
6
|
+
# execute code blocks.
|
7
|
+
class Periodic
|
8
|
+
|
9
|
+
# maybe consider :day in the future
|
10
|
+
TIME_UNITS = {
|
11
|
+
minute: 60,
|
12
|
+
minutes: 60,
|
13
|
+
hour: 60*60,
|
14
|
+
hours: 60*60
|
15
|
+
}
|
16
|
+
|
17
|
+
# Creates a new instance of UrlTracker::Periodic and starts the event loop.
|
18
|
+
def initialize
|
19
|
+
@named_tasks = {}
|
20
|
+
start_event_loop
|
21
|
+
end
|
22
|
+
|
23
|
+
# Register a new task to be executed in a specified amount of time.
|
24
|
+
# Examples:
|
25
|
+
#
|
26
|
+
# p = UrlTracker::Periodic.new
|
27
|
+
# p.every(:minute) { do_something } #=> executed every minute
|
28
|
+
# p.every(2, :minutes) { do_something } #=> executed every 2 minutes
|
29
|
+
# p.every(4, :hours) { do_something } #=> executed every 4 hours
|
30
|
+
def every(*args, &block)
|
31
|
+
time = 1
|
32
|
+
|
33
|
+
case args.first
|
34
|
+
when Integer then time = args[0]*seconds_for(args[1])
|
35
|
+
when Symbol then time *= seconds_for(args[0])
|
36
|
+
else raise "Invalid period #{args[0].inspect}"
|
37
|
+
end
|
38
|
+
|
39
|
+
task = { every: time, task: block }
|
40
|
+
task.merge!(name: @name) if named_task?
|
41
|
+
|
42
|
+
schedule_task(task)
|
43
|
+
@name = nil
|
44
|
+
|
45
|
+
time
|
46
|
+
end
|
47
|
+
|
48
|
+
# Returns named tasks registered.
|
49
|
+
# Example
|
50
|
+
#
|
51
|
+
# p = UrlTracker::Periodic.new
|
52
|
+
# p.task(:foo).every(:minute) { do_something}
|
53
|
+
# p.task(:bar).every(2, :minute) { do_other_thing }
|
54
|
+
# p.named_tasks #=> [:foo, :bar]
|
55
|
+
def named_tasks
|
56
|
+
task_names
|
57
|
+
end
|
58
|
+
|
59
|
+
# Removes a task named +name+, so that it will no longer run
|
60
|
+
def remove_task(name)
|
61
|
+
raise "Unregistered task #{name.inspect}" unless @named_tasks.include?(name)
|
62
|
+
|
63
|
+
unschedule_task(name)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Restarts the event loop
|
67
|
+
def restart
|
68
|
+
stop if running?
|
69
|
+
start_event_loop
|
70
|
+
end
|
71
|
+
|
72
|
+
# Checks if the tasks are running
|
73
|
+
def running?
|
74
|
+
@event_thread.alive?
|
75
|
+
end
|
76
|
+
|
77
|
+
# Stop all scheduled tasks
|
78
|
+
def stop
|
79
|
+
@event_thread.terminate
|
80
|
+
@event_thread.join
|
81
|
+
end
|
82
|
+
|
83
|
+
# Used for creating named tasks or, in other words, tasks that can be removed
|
84
|
+
# later using #remove
|
85
|
+
def task(name)
|
86
|
+
@name = name.to_s
|
87
|
+
self
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def named_task?
|
93
|
+
!@name.nil?
|
94
|
+
end
|
95
|
+
|
96
|
+
def seconds_for(time_unit)
|
97
|
+
raise "Unkown time unit #{time_unit.inspect}" unless TIME_UNITS.include?(time_unit)
|
98
|
+
TIME_UNITS[time_unit]
|
99
|
+
end
|
100
|
+
|
101
|
+
# +task+ is expected to be in the format:
|
102
|
+
#
|
103
|
+
# { every: 60, task: #<Proc:0x...> }
|
104
|
+
#
|
105
|
+
# for a task to be run every minute, for example.
|
106
|
+
def schedule_task(t)
|
107
|
+
periodic_timer = EM.add_periodic_timer(t[:every], &t[:task])
|
108
|
+
@named_tasks[@name] = periodic_timer if named_task?
|
109
|
+
end
|
110
|
+
|
111
|
+
def start_event_loop
|
112
|
+
# start the event loop in a separate thread
|
113
|
+
@event_thread = Thread.new { EM.run }
|
114
|
+
|
115
|
+
# Wait for the reactor to be ready
|
116
|
+
while !@event_thread.stop?; end
|
117
|
+
end
|
118
|
+
|
119
|
+
def task_names
|
120
|
+
@named_tasks.keys
|
121
|
+
end
|
122
|
+
|
123
|
+
def unschedule_task(name)
|
124
|
+
EM.cancel_timer(@named_tasks.delete(name))
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
module UrlTracker
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ostruct'
|
5
|
+
require 'logger'
|
6
|
+
require 'pathname'
|
7
|
+
|
8
|
+
# Class that waits for messages on a given socket and responds back to clients.
|
9
|
+
# It can track URLs, list currently tracked URLs and stop tracking.
|
10
|
+
# The interval between consecutive checks for change in the pages is customizable.
|
11
|
+
class Server
|
12
|
+
include SocketCommunication
|
13
|
+
|
14
|
+
# Initializes a new server which logs its activities using the passed logger object.
|
15
|
+
# Defaults to Ruby's Logger class. Log level defaults to +Logger::INFO+ and you can
|
16
|
+
# override it by setting the URL_TRACKER_DEBUG environment variable
|
17
|
+
def initialize(logger=Logger.new(STDERR))
|
18
|
+
setup_signals
|
19
|
+
@logger = logger
|
20
|
+
@logger.level = level_from_env || Logger::INFO
|
21
|
+
end
|
22
|
+
|
23
|
+
# Main server method. Loops forever, waiting for new connections, and
|
24
|
+
# checking new messages. Takes the appropriate action according to what is
|
25
|
+
# received, such as a new URL to track.
|
26
|
+
def loop_forever
|
27
|
+
running = true
|
28
|
+
|
29
|
+
while wait_for_connection
|
30
|
+
@logger.info("New client connected")
|
31
|
+
command, *arguments = next_message.split
|
32
|
+
@logger.debug "#{command} received"
|
33
|
+
response = case command
|
34
|
+
when /^track$/i then track(arguments.first)
|
35
|
+
when /^list$/i then list
|
36
|
+
when /^release$/i then release(arguments.first)
|
37
|
+
end
|
38
|
+
|
39
|
+
write(response) unless response.nil?
|
40
|
+
end
|
41
|
+
rescue => e
|
42
|
+
@logger.error("An error occurred when waiting for new connections!\n\t#{e.inspect}\n\t#{e.backtrace.join("\n\t")}")
|
43
|
+
end
|
44
|
+
|
45
|
+
# Track an URL
|
46
|
+
def track(uri)
|
47
|
+
@logger.info("Tracking URL #{uri}")
|
48
|
+
UrlTracker.track_uri(uri)
|
49
|
+
end
|
50
|
+
|
51
|
+
# List tracked URLs
|
52
|
+
def list
|
53
|
+
UrlTracker.list_all
|
54
|
+
end
|
55
|
+
|
56
|
+
# Release an URL
|
57
|
+
def release(uri)
|
58
|
+
@logger.info("Releasing URL #{uri}")
|
59
|
+
UrlTracker.release_uri(uri)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Runs the server, according to the argv options passed.
|
63
|
+
# Possible options:
|
64
|
+
#
|
65
|
+
# -s, --socket [FILE] #=> Uses FILE as socket file for communication
|
66
|
+
# -f, --fork #=> Forks and works as a daemon
|
67
|
+
#
|
68
|
+
# +params+ can also be a hash, containing the parsed information to the server.
|
69
|
+
def run(params)
|
70
|
+
@logger.info "UrlTracker #{UrlTracker::VERSION} starting. Log level is #{@logger.level.inspect}."
|
71
|
+
|
72
|
+
options = parse(params)
|
73
|
+
@socket_file = options.socket_file
|
74
|
+
@pid = nil
|
75
|
+
|
76
|
+
bind(@socket_file)
|
77
|
+
|
78
|
+
@logger.info "Server starting at socket #{Pathname.new(@socket_file).realpath.to_s}"
|
79
|
+
|
80
|
+
if options.fork
|
81
|
+
@pid = fork { loop_forever }
|
82
|
+
@logger.info "Forking to background. Child pid #{@pid}"
|
83
|
+
else
|
84
|
+
loop_forever
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Stops the current server
|
89
|
+
def stop
|
90
|
+
@pid ? Process.kill('TERM', @pid) : close_connection
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def parse(argv)
|
96
|
+
return OpenStruct.new(argv) if argv.kind_of?(Hash)
|
97
|
+
|
98
|
+
options = OpenStruct.new
|
99
|
+
options.socket_file = '/tmp/_ut.sock'
|
100
|
+
options.fork = false
|
101
|
+
|
102
|
+
opts = OptionParser.new do |opts|
|
103
|
+
opts.banner = 'Usage: utd [options]'
|
104
|
+
opts.separator ''
|
105
|
+
opts.separator 'Available options:'
|
106
|
+
|
107
|
+
opts.on('-s', '--socket FILE', 'Uses FILE as a socket. Defaults to /tmp/_ut.sock') do |socket_file|
|
108
|
+
options.socket_file = socket_file
|
109
|
+
end
|
110
|
+
|
111
|
+
opts.on('-f', '--fork', 'Forks and works as a daemon') do |f|
|
112
|
+
options.fork = f
|
113
|
+
end
|
114
|
+
|
115
|
+
opts.on_tail('-h', '--help', 'Show this message') do
|
116
|
+
puts opts
|
117
|
+
exit(0)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
opts.parse!(argv)
|
122
|
+
|
123
|
+
options
|
124
|
+
end
|
125
|
+
|
126
|
+
def setup_signals
|
127
|
+
['INT', 'TERM', 'QUIT'].each do |signal|
|
128
|
+
Signal.trap(signal) do
|
129
|
+
stop
|
130
|
+
@logger.info "SIG#{signal} received. Bye."
|
131
|
+
exit(0)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Retrieves log level from DEBUG environment variable
|
137
|
+
def level_from_env
|
138
|
+
case ENV['URL_TRACKER_DEBUG']
|
139
|
+
when 'debug' then Logger::DEBUG
|
140
|
+
when 'error' then Logger::ERROR
|
141
|
+
when 'fatal' then Logger::FATAL
|
142
|
+
when 'info' then Logger::INFO
|
143
|
+
when 'warning' then Logger::WARN
|
144
|
+
else nil
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module UrlTracker
|
2
|
+
|
3
|
+
require 'socket'
|
4
|
+
|
5
|
+
# Implements communication via Unix sockets.
|
6
|
+
module SocketCommunication
|
7
|
+
attr_writer :path # path to the socket file
|
8
|
+
|
9
|
+
InvalidSocketError = Class.new(StandardError)
|
10
|
+
|
11
|
+
# Messages received cannot be longer than 1024 bytes
|
12
|
+
MAX_MESSAGE_LENGTH = 1024
|
13
|
+
|
14
|
+
# Max connections to be queued before accept
|
15
|
+
MAX_CONN_QUEUE = 10
|
16
|
+
|
17
|
+
# Connects to the Unix socket. Returns true if the connection was successful.
|
18
|
+
# Otherwise an exception is thrown. This method cannot be called if +bind+ was
|
19
|
+
# already called; neither can you call +bind+ if you call this method.
|
20
|
+
def connect(path)
|
21
|
+
raise_socket_error_if { defined? @socket }
|
22
|
+
@socket = Socket.new(:UNIX, :SOCK_STREAM, 0)
|
23
|
+
@socket.connect addrinfo_for(path)
|
24
|
+
true
|
25
|
+
end
|
26
|
+
|
27
|
+
# Binds the given path, creating a Unix socket. As with connect, you cannot use
|
28
|
+
# this method if already called +connect+ before. After this method is called,
|
29
|
+
# the socket will be waiting for connections.
|
30
|
+
def bind(path)
|
31
|
+
raise_socket_error_if { defined? @socket }
|
32
|
+
@socket_file = path
|
33
|
+
@socket = Socket.new(:UNIX, :SOCK_STREAM, 0)
|
34
|
+
@socket.bind addrinfo_for(@socket_file)
|
35
|
+
@socket.listen(MAX_CONN_QUEUE)
|
36
|
+
true
|
37
|
+
end
|
38
|
+
|
39
|
+
# Waits for a connection in the binded socket
|
40
|
+
def wait_for_connection
|
41
|
+
@current_client = @socket.accept.first
|
42
|
+
end
|
43
|
+
|
44
|
+
# Writes to the socket, returning the number of bytes sent. This method can
|
45
|
+
# only be called before +connect+ or +wait_for_connection+, otherwise you
|
46
|
+
# will get an exception
|
47
|
+
def write(message)
|
48
|
+
socket = (defined? @current_client) ? @current_client : @socket
|
49
|
+
socket.send(message, 0)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Waits for a message. Blocks until it is received.
|
53
|
+
def next_message
|
54
|
+
socket = (defined? @current_client) ? @current_client : @socket
|
55
|
+
socket.recvfrom(MAX_MESSAGE_LENGTH).first
|
56
|
+
end
|
57
|
+
|
58
|
+
def close_connection
|
59
|
+
defined?(@socket) && !@socket.closed? && @socket.close
|
60
|
+
File.unlink(@socket_file) if defined?(@socket_file) && File.exists?(@socket_file)
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def raise_socket_error_if(&block)
|
66
|
+
raise InvalidSocketError if block.call
|
67
|
+
end
|
68
|
+
|
69
|
+
def addrinfo_for(path)
|
70
|
+
Addrinfo.unix(path)
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: url_tracker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.1'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -68,6 +68,12 @@ extensions: []
|
|
68
68
|
extra_rdoc_files: []
|
69
69
|
files:
|
70
70
|
- lib/url_tracker.rb
|
71
|
+
- lib/url_tracker/socket_communication.rb
|
72
|
+
- lib/url_tracker/server.rb
|
73
|
+
- lib/url_tracker/periodic.rb
|
74
|
+
- lib/url_tracker/client.rb
|
75
|
+
- lib/url_tracker/page.rb
|
76
|
+
- lib/url_tracker/version.rb
|
71
77
|
- bin/ut
|
72
78
|
- bin/utd
|
73
79
|
- test/test_server.rb
|
@@ -91,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
97
|
version: '0'
|
92
98
|
segments:
|
93
99
|
- 0
|
94
|
-
hash: -
|
100
|
+
hash: -1434435919243475892
|
95
101
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
102
|
none: false
|
97
103
|
requirements:
|
@@ -100,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
106
|
version: '0'
|
101
107
|
segments:
|
102
108
|
- 0
|
103
|
-
hash: -
|
109
|
+
hash: -1434435919243475892
|
104
110
|
requirements: []
|
105
111
|
rubyforge_project:
|
106
112
|
rubygems_version: 1.8.23
|