url_tracker 1.0 → 1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,109 @@
1
+ module UrlTracker
2
+
3
+ # Class who deals with requesting information to the server, such as
4
+ # track a new URL, list all currently tracked links, stop tracking something, etc.
5
+ class Client
6
+ include SocketCommunication
7
+
8
+ require 'optparse'
9
+ require 'ostruct'
10
+
11
+ def initialize(socket_file = '/tmp/_ut.sock')
12
+ connect(socket_file)
13
+ rescue Errno::ENOENT
14
+ STDERR.puts 'Connection error. Is the server running?'
15
+ exit(1)
16
+ end
17
+
18
+ # Sends a message to the server asking to track a new URL. Format
19
+ # of the message:
20
+ #
21
+ # "track {{URL}}"
22
+ def track(url)
23
+ write("track #{url}")
24
+ next_message == 'ok'
25
+ end
26
+
27
+ # Asks the server for all the URLs currently being tracked. Expects a
28
+ # string back, with URLs separated by commas.
29
+ def list
30
+ write('list')
31
+ next_message.split(',')
32
+ end
33
+
34
+ # Tells the server to stop tracking the given URL. Returns true if the
35
+ # operation was successful
36
+ def release(url)
37
+ write("release #{url}")
38
+ next_message == 'ok'
39
+ end
40
+
41
+ # Tells the server to shutdown
42
+ def shutdown
43
+ write('shutdown')
44
+ end
45
+
46
+ # Calls one of the methods above according to the options passed.
47
+ # Available options:
48
+ #
49
+ # -t, --track URL #=> Starts tracking URL
50
+ # -l, --list #=> List currently tracked URLs
51
+ # -r, --release URL #=> Releases URL, not tracking it any more
52
+ #
53
+ # +params+ can also be a hash, in which case it will be considered already parsed.
54
+ def run(params)
55
+ options = parse(params)
56
+
57
+ output = case options.action
58
+ when :track then track(options.url)
59
+ when :list then list
60
+ when :release then release(options.url)
61
+ end
62
+ end
63
+
64
+
65
+ private
66
+
67
+ def parse(argv)
68
+ return OpenStruct.new(argv) if argv.kind_of?(Hash)
69
+
70
+ options = OpenStruct.new
71
+ options.action = :nothing
72
+
73
+ opts = OptionParser.new do |opts|
74
+ opts.banner = 'Usage: ut [options]'
75
+ opts.separator ''
76
+ opts.separator 'Available options:'
77
+
78
+ opts.on('-t', '--track URL', 'Start tracking URL') do |url|
79
+ options.url = prepare_url(url)
80
+ options.action = :track
81
+ end
82
+
83
+ opts.on('-l', '--list', 'List currently tracked URLs') do |list|
84
+ options.action = :list
85
+ end
86
+
87
+ opts.on('-r', '--release URL', 'Release URL, not tracking it any more') do |url|
88
+ options.url = url
89
+ options.action = :release
90
+ end
91
+
92
+ opts.on_tail('-h', '--help', 'Show this message') do
93
+ puts opts
94
+ exit(0)
95
+ end
96
+ end
97
+
98
+ opts.parse!(argv)
99
+
100
+ options
101
+ end
102
+
103
+ def prepare_url(url)
104
+ url.tap { url.prepend('http://') unless url.start_with?('http://') }
105
+ end
106
+
107
+ end
108
+
109
+ end
@@ -0,0 +1,69 @@
1
+ require 'net/http'
2
+
3
+ module UrlTracker
4
+
5
+ # Class representing a single web page to be tracked. It is capable of fetching
6
+ # the page's content and verifying if it was changed since last time it was fetched
7
+ class Page
8
+ attr_reader :uri
9
+
10
+ # Creates a new instance of UrlTracker::Page. The first argument is the URI that
11
+ # corresponds to the page and will be lazily fetched. The second parameter
12
+ # is an object that is responsible for fetching the page itself. It mus respond
13
+ # to the +get+ method with the given +uri+ parameter and return a string
14
+ # with the page contents; this parameter defaults to +Net::HTTP+, so you should
15
+ # by default pass +uri+ as an instance of +URI::Generic+
16
+ def initialize(uri, page_fetcher = Net::HTTP)
17
+ @uri = uri.dup
18
+ @page_fetcher = page_fetcher
19
+ end
20
+
21
+ # Returns a string containing the page content. If not yet fetched, this method
22
+ # will fetch the page for you.
23
+ def content
24
+ @content ||= fetch
25
+ end
26
+
27
+ # This method returns a string containing the page content, but always fetches
28
+ # the page again
29
+ def content!
30
+ @content = fetch
31
+ end
32
+
33
+ # Verifies if a page has changed since last the last time it was fetched
34
+ def changed?
35
+ if @content # we have a cached copy
36
+ old_content = @content
37
+ @content = fetch
38
+ @content != old_content
39
+ else
40
+ @content = fetch
41
+ false
42
+ end
43
+ end
44
+
45
+ # Two pages are considered the same if they have the same URI. Right, that
46
+ # might not be true if the content is different (which shouldn't if you are
47
+ # building a RESTful service), but we will just ignore that and pretend we
48
+ # we live in a better world.
49
+ def eql?(other)
50
+ @uri.eql?(other.uri)
51
+ end
52
+
53
+ def ==(other)
54
+ @uri == other.uri
55
+ end
56
+
57
+ def hash
58
+ @uri.hash
59
+ end
60
+
61
+ private
62
+
63
+ def fetch
64
+ @page_fetcher.get(@uri)
65
+ end
66
+
67
+ end
68
+
69
+ end
@@ -0,0 +1,129 @@
1
+ module UrlTracker
2
+
3
+ require 'eventmachine'
4
+
5
+ # Small class wrapping EventMachine calls to programatically
6
+ # execute code blocks.
7
+ class Periodic
8
+
9
+ # maybe consider :day in the future
10
+ TIME_UNITS = {
11
+ minute: 60,
12
+ minutes: 60,
13
+ hour: 60*60,
14
+ hours: 60*60
15
+ }
16
+
17
+ # Creates a new instance of UrlTracker::Periodic and starts the event loop.
18
+ def initialize
19
+ @named_tasks = {}
20
+ start_event_loop
21
+ end
22
+
23
+ # Register a new task to be executed in a specified amount of time.
24
+ # Examples:
25
+ #
26
+ # p = UrlTracker::Periodic.new
27
+ # p.every(:minute) { do_something } #=> executed every minute
28
+ # p.every(2, :minutes) { do_something } #=> executed every 2 minutes
29
+ # p.every(4, :hours) { do_something } #=> executed every 4 hours
30
+ def every(*args, &block)
31
+ time = 1
32
+
33
+ case args.first
34
+ when Integer then time = args[0]*seconds_for(args[1])
35
+ when Symbol then time *= seconds_for(args[0])
36
+ else raise "Invalid period #{args[0].inspect}"
37
+ end
38
+
39
+ task = { every: time, task: block }
40
+ task.merge!(name: @name) if named_task?
41
+
42
+ schedule_task(task)
43
+ @name = nil
44
+
45
+ time
46
+ end
47
+
48
+ # Returns named tasks registered.
49
+ # Example
50
+ #
51
+ # p = UrlTracker::Periodic.new
52
+ # p.task(:foo).every(:minute) { do_something}
53
+ # p.task(:bar).every(2, :minute) { do_other_thing }
54
+ # p.named_tasks #=> [:foo, :bar]
55
+ def named_tasks
56
+ task_names
57
+ end
58
+
59
+ # Removes a task named +name+, so that it will no longer run
60
+ def remove_task(name)
61
+ raise "Unregistered task #{name.inspect}" unless @named_tasks.include?(name)
62
+
63
+ unschedule_task(name)
64
+ end
65
+
66
+ # Restarts the event loop
67
+ def restart
68
+ stop if running?
69
+ start_event_loop
70
+ end
71
+
72
+ # Checks if the tasks are running
73
+ def running?
74
+ @event_thread.alive?
75
+ end
76
+
77
+ # Stop all scheduled tasks
78
+ def stop
79
+ @event_thread.terminate
80
+ @event_thread.join
81
+ end
82
+
83
+ # Used for creating named tasks or, in other words, tasks that can be removed
84
+ # later using #remove
85
+ def task(name)
86
+ @name = name.to_s
87
+ self
88
+ end
89
+
90
+ private
91
+
92
+ def named_task?
93
+ !@name.nil?
94
+ end
95
+
96
+ def seconds_for(time_unit)
97
+ raise "Unkown time unit #{time_unit.inspect}" unless TIME_UNITS.include?(time_unit)
98
+ TIME_UNITS[time_unit]
99
+ end
100
+
101
+ # +task+ is expected to be in the format:
102
+ #
103
+ # { every: 60, task: #<Proc:0x...> }
104
+ #
105
+ # for a task to be run every minute, for example.
106
+ def schedule_task(t)
107
+ periodic_timer = EM.add_periodic_timer(t[:every], &t[:task])
108
+ @named_tasks[@name] = periodic_timer if named_task?
109
+ end
110
+
111
+ def start_event_loop
112
+ # start the event loop in a separate thread
113
+ @event_thread = Thread.new { EM.run }
114
+
115
+ # Wait for the reactor to be ready
116
+ while !@event_thread.stop?; end
117
+ end
118
+
119
+ def task_names
120
+ @named_tasks.keys
121
+ end
122
+
123
+ def unschedule_task(name)
124
+ EM.cancel_timer(@named_tasks.delete(name))
125
+ end
126
+
127
+ end
128
+
129
+ end
@@ -0,0 +1,150 @@
1
+ module UrlTracker
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+ require 'logger'
6
+ require 'pathname'
7
+
8
+ # Class that waits for messages on a given socket and responds back to clients.
9
+ # It can track URLs, list currently tracked URLs and stop tracking.
10
+ # The interval between consecutive checks for change in the pages is customizable.
11
+ class Server
12
+ include SocketCommunication
13
+
14
+ # Initializes a new server which logs its activities using the passed logger object.
15
+ # Defaults to Ruby's Logger class. Log level defaults to +Logger::INFO+ and you can
16
+ # override it by setting the URL_TRACKER_DEBUG environment variable
17
+ def initialize(logger=Logger.new(STDERR))
18
+ setup_signals
19
+ @logger = logger
20
+ @logger.level = level_from_env || Logger::INFO
21
+ end
22
+
23
+ # Main server method. Loops forever, waiting for new connections, and
24
+ # checking new messages. Takes the appropriate action according to what is
25
+ # received, such as a new URL to track.
26
+ def loop_forever
27
+ running = true
28
+
29
+ while wait_for_connection
30
+ @logger.info("New client connected")
31
+ command, *arguments = next_message.split
32
+ @logger.debug "#{command} received"
33
+ response = case command
34
+ when /^track$/i then track(arguments.first)
35
+ when /^list$/i then list
36
+ when /^release$/i then release(arguments.first)
37
+ end
38
+
39
+ write(response) unless response.nil?
40
+ end
41
+ rescue => e
42
+ @logger.error("An error occurred when waiting for new connections!\n\t#{e.inspect}\n\t#{e.backtrace.join("\n\t")}")
43
+ end
44
+
45
+ # Track an URL
46
+ def track(uri)
47
+ @logger.info("Tracking URL #{uri}")
48
+ UrlTracker.track_uri(uri)
49
+ end
50
+
51
+ # List tracked URLs
52
+ def list
53
+ UrlTracker.list_all
54
+ end
55
+
56
+ # Release an URL
57
+ def release(uri)
58
+ @logger.info("Releasing URL #{uri}")
59
+ UrlTracker.release_uri(uri)
60
+ end
61
+
62
+ # Runs the server, according to the argv options passed.
63
+ # Possible options:
64
+ #
65
+ # -s, --socket [FILE] #=> Uses FILE as socket file for communication
66
+ # -f, --fork #=> Forks and works as a daemon
67
+ #
68
+ # +params+ can also be a hash, containing the parsed information to the server.
69
+ def run(params)
70
+ @logger.info "UrlTracker #{UrlTracker::VERSION} starting. Log level is #{@logger.level.inspect}."
71
+
72
+ options = parse(params)
73
+ @socket_file = options.socket_file
74
+ @pid = nil
75
+
76
+ bind(@socket_file)
77
+
78
+ @logger.info "Server starting at socket #{Pathname.new(@socket_file).realpath.to_s}"
79
+
80
+ if options.fork
81
+ @pid = fork { loop_forever }
82
+ @logger.info "Forking to background. Child pid #{@pid}"
83
+ else
84
+ loop_forever
85
+ end
86
+ end
87
+
88
+ # Stops the current server
89
+ def stop
90
+ @pid ? Process.kill('TERM', @pid) : close_connection
91
+ end
92
+
93
+ private
94
+
95
+ def parse(argv)
96
+ return OpenStruct.new(argv) if argv.kind_of?(Hash)
97
+
98
+ options = OpenStruct.new
99
+ options.socket_file = '/tmp/_ut.sock'
100
+ options.fork = false
101
+
102
+ opts = OptionParser.new do |opts|
103
+ opts.banner = 'Usage: utd [options]'
104
+ opts.separator ''
105
+ opts.separator 'Available options:'
106
+
107
+ opts.on('-s', '--socket FILE', 'Uses FILE as a socket. Defaults to /tmp/_ut.sock') do |socket_file|
108
+ options.socket_file = socket_file
109
+ end
110
+
111
+ opts.on('-f', '--fork', 'Forks and works as a daemon') do |f|
112
+ options.fork = f
113
+ end
114
+
115
+ opts.on_tail('-h', '--help', 'Show this message') do
116
+ puts opts
117
+ exit(0)
118
+ end
119
+ end
120
+
121
+ opts.parse!(argv)
122
+
123
+ options
124
+ end
125
+
126
+ def setup_signals
127
+ ['INT', 'TERM', 'QUIT'].each do |signal|
128
+ Signal.trap(signal) do
129
+ stop
130
+ @logger.info "SIG#{signal} received. Bye."
131
+ exit(0)
132
+ end
133
+ end
134
+ end
135
+
136
+ # Retrieves log level from DEBUG environment variable
137
+ def level_from_env
138
+ case ENV['URL_TRACKER_DEBUG']
139
+ when 'debug' then Logger::DEBUG
140
+ when 'error' then Logger::ERROR
141
+ when 'fatal' then Logger::FATAL
142
+ when 'info' then Logger::INFO
143
+ when 'warning' then Logger::WARN
144
+ else nil
145
+ end
146
+ end
147
+
148
+ end
149
+
150
+ end
@@ -0,0 +1,75 @@
1
+ module UrlTracker
2
+
3
+ require 'socket'
4
+
5
+ # Implements communication via Unix sockets.
6
+ module SocketCommunication
7
+ attr_writer :path # path to the socket file
8
+
9
+ InvalidSocketError = Class.new(StandardError)
10
+
11
+ # Messages received cannot be longer than 1024 bytes
12
+ MAX_MESSAGE_LENGTH = 1024
13
+
14
+ # Max connections to be queued before accept
15
+ MAX_CONN_QUEUE = 10
16
+
17
+ # Connects to the Unix socket. Returns true if the connection was successful.
18
+ # Otherwise an exception is thrown. This method cannot be called if +bind+ was
19
+ # already called; neither can you call +bind+ if you call this method.
20
+ def connect(path)
21
+ raise_socket_error_if { defined? @socket }
22
+ @socket = Socket.new(:UNIX, :SOCK_STREAM, 0)
23
+ @socket.connect addrinfo_for(path)
24
+ true
25
+ end
26
+
27
+ # Binds the given path, creating a Unix socket. As with connect, you cannot use
28
+ # this method if already called +connect+ before. After this method is called,
29
+ # the socket will be waiting for connections.
30
+ def bind(path)
31
+ raise_socket_error_if { defined? @socket }
32
+ @socket_file = path
33
+ @socket = Socket.new(:UNIX, :SOCK_STREAM, 0)
34
+ @socket.bind addrinfo_for(@socket_file)
35
+ @socket.listen(MAX_CONN_QUEUE)
36
+ true
37
+ end
38
+
39
+ # Waits for a connection in the binded socket
40
+ def wait_for_connection
41
+ @current_client = @socket.accept.first
42
+ end
43
+
44
+ # Writes to the socket, returning the number of bytes sent. This method can
45
+ # only be called before +connect+ or +wait_for_connection+, otherwise you
46
+ # will get an exception
47
+ def write(message)
48
+ socket = (defined? @current_client) ? @current_client : @socket
49
+ socket.send(message, 0)
50
+ end
51
+
52
+ # Waits for a message. Blocks until it is received.
53
+ def next_message
54
+ socket = (defined? @current_client) ? @current_client : @socket
55
+ socket.recvfrom(MAX_MESSAGE_LENGTH).first
56
+ end
57
+
58
+ def close_connection
59
+ defined?(@socket) && !@socket.closed? && @socket.close
60
+ File.unlink(@socket_file) if defined?(@socket_file) && File.exists?(@socket_file)
61
+ end
62
+
63
+ private
64
+
65
+ def raise_socket_error_if(&block)
66
+ raise InvalidSocketError if block.call
67
+ end
68
+
69
+ def addrinfo_for(path)
70
+ Addrinfo.unix(path)
71
+ end
72
+
73
+ end
74
+
75
+ end
@@ -0,0 +1,5 @@
1
+ module UrlTracker
2
+
3
+ VERSION = '1.1'
4
+
5
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: url_tracker
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.0'
4
+ version: '1.1'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -68,6 +68,12 @@ extensions: []
68
68
  extra_rdoc_files: []
69
69
  files:
70
70
  - lib/url_tracker.rb
71
+ - lib/url_tracker/socket_communication.rb
72
+ - lib/url_tracker/server.rb
73
+ - lib/url_tracker/periodic.rb
74
+ - lib/url_tracker/client.rb
75
+ - lib/url_tracker/page.rb
76
+ - lib/url_tracker/version.rb
71
77
  - bin/ut
72
78
  - bin/utd
73
79
  - test/test_server.rb
@@ -91,7 +97,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
91
97
  version: '0'
92
98
  segments:
93
99
  - 0
94
- hash: -1259474569283625211
100
+ hash: -1434435919243475892
95
101
  required_rubygems_version: !ruby/object:Gem::Requirement
96
102
  none: false
97
103
  requirements:
@@ -100,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
106
  version: '0'
101
107
  segments:
102
108
  - 0
103
- hash: -1259474569283625211
109
+ hash: -1434435919243475892
104
110
  requirements: []
105
111
  rubyforge_project:
106
112
  rubygems_version: 1.8.23