crocoduck 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+ # The Entry object represents a document retrieved from
2
+ # the datastore. By default this is a MongoDB document.
3
+ require 'crocoduck/job'
4
+ require 'crocoduck/redis'
5
+ require 'crocoduck/resque'
6
+ require 'crocoduck/store'
7
+
8
+ module Crocoduck
9
+ class Entry
10
+ attr_accessor :entry_id, :entry, :store
11
+
12
+ def initialize(entry_id)
13
+ @entry_id = entry_id
14
+ end
15
+
16
+ # A quick way to start work on an Entry is to do something
17
+ # like the following
18
+ #
19
+ # >>> e = Entry.new(53029).schedule(ShortUrlJob)
20
+ def schedule(worker = Job)
21
+ Resque.enqueue worker, entry_id
22
+ end
23
+
24
+ # Rather than access ``Crocoduck::Entry.entry`` directly, one can do the
25
+ # following:
26
+ #
27
+ # :001 > e = Crocoduck::Entry.new(50039)
28
+ # => #<Crocoduck::Entry:0x101611938 @entry_id=50039>
29
+ # :002 > e["url"]
30
+ # => "/apple/news/2011/04/this-is-not-a-real-article.ars"
31
+ def [](key)
32
+ if entry.has_key? key
33
+ entry[key]
34
+ else
35
+ nil
36
+ end
37
+ end
38
+
39
+ # This hasn't been field tested yet, but ``update`` should be a
40
+ # convienance method to manipulate a field on the entry document
41
+ # stored here. If a job needed to store results or data on a
42
+ # different document, she could use the ``Crocoduck::Store.update`` method
43
+ # directly.
44
+ def update(field, value)
45
+ store.update entry_id, field, value
46
+ end
47
+
48
+ # Call this method on your entries to have them close their own
49
+ # store object.
50
+ def close
51
+ store.close
52
+ end
53
+
54
+ def setup?
55
+ store.setup? && !entry.nil?
56
+ end
57
+
58
+ private
59
+
60
+ # When the ``entry`` property of an Entry object is accessed
61
+ # we attempt to retrieve the document from the store, save it
62
+ # on our object, and then return it. Further accesses get the
63
+ # cached copy of the document.
64
+ def entry
65
+ @entry ||= store.get entry_id
66
+ end
67
+
68
+ # Accessing ``Crocoduck::Entry.store`` gets you a new store object to work with.
69
+ def store
70
+ @store ||= Store.new
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,102 @@
1
+ # `Job` is a class that is intended to be extended to do meaningful work. A
2
+ # Crocoduck Job is simply a Resque style job that knows about its own
3
+ # datastore and an entry object (Mongo Document when using the supplied
4
+ # ``store`` class).
5
+ require 'crocoduck/logging'
6
+ require 'crocoduck/entry'
7
+
8
+ module Crocoduck
9
+ class Job
10
+ # Override the value of ``@queue`` to specify which resque workers will
11
+ # process this job.
12
+ @queue = :low
13
+
14
+ class << self
15
+ attr_accessor :description
16
+ end
17
+
18
+ # ``perform`` is the method called by Resque. A Crocoduck job only expects
19
+ # an ``entry_id`` corresponding to a record in your Mongo store. An
20
+ # ``Entry`` is instantiated with said ``entry_id`` and passed to a new
21
+ # instance of this job and run is called on it.
22
+ def self.perform(entry_id)
23
+ init_with_id(entry_id).run
24
+ end
25
+
26
+ # A convienance initializer that returns a Crocoduck::Job instance with
27
+ # its entry object ready to go.
28
+ def self.init_with_id(entry_id)
29
+ new(Entry.new entry_id)
30
+ end
31
+
32
+ include Logging
33
+
34
+ attr_accessor :entry
35
+
36
+ def initialize(entry)
37
+ @entry = entry
38
+ end
39
+
40
+ # The ``do_work`` method should be overridden to do some kind of work on
41
+ # the stored entry object.
42
+ def do_work
43
+ logger.info "Starting work"
44
+ # Do Something with entry
45
+ # entry.update "derp", "herp"
46
+ logger.info entry["url"]
47
+ # shorturl = shorturl.generate @entry.url
48
+ # store.update entry_id, 'shorturl', shorturl
49
+ # store.update entry_id, 'shorturl_status, job_status
50
+ logger.info "Ending work"
51
+ end
52
+
53
+ # If you job failed, you can do something interesting here. Generally
54
+ # you will want to ultimately raise the exception so Resque can track it.
55
+ def handle_exception(e)
56
+ raise e
57
+ end
58
+
59
+ # This method will be called immediately before sanity checks and before
60
+ # ``do_work`` is called.
61
+ def setup
62
+ logger.info "Job is setup"
63
+ end
64
+
65
+ # This method will be called once ``do_work`` has finished successfully.
66
+ # Do anything you'd need to do once the processing was finished
67
+ # properly (save out your entry, update stats, et cetera).
68
+ def finished
69
+ logger.info "Job finished successfully"
70
+ end
71
+
72
+ # This method will always be called, regardless of the failure or
73
+ # success of your job.
74
+ def cleanup
75
+ entry.close
76
+ logger.info "Job cleaned up"
77
+ end
78
+
79
+ # The ``run`` method is a thin wrapper around ``do_work`` which lets us
80
+ # do some setup, benchmark the work we'll do, cleanly handle exceptions if
81
+ # thrown by the ``do_work`` call, and clean up our store and entry on
82
+ # success.
83
+ def run
84
+ setup
85
+ # The job will not process anything unless our datastore has enough
86
+ # information to connect and if a valid entry object could be fetched
87
+ # from the store.
88
+ return unless entry.setup?
89
+ benchmark :info, "Running job" do
90
+ do_work
91
+ end
92
+ # Exception handling is parceled out to ``Job`` methods you can override
93
+ # to handle cleanup specific to your task.
94
+ rescue Exception => e
95
+ handle_exception e
96
+ else
97
+ finished
98
+ ensure
99
+ cleanup
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,25 @@
1
+ # Include Loggging into your class to get a logger and benchmark
2
+ # object for logging errors or information to stdout and for profiling
3
+ # interesting bits of code.
4
+ require 'benchmark'
5
+ require 'logger'
6
+
7
+ module Crocoduck
8
+ def self.logger
9
+ @logger ||= Logger.new($stderr)
10
+ end
11
+
12
+ module Logging
13
+ private
14
+ def logger
15
+ Crocoduck.logger
16
+ end
17
+
18
+ def benchmark(level, message)
19
+ result = nil
20
+ ms = Benchmark.realtime { result = yield }
21
+ logger.send(level, '%s (%.5fs)' % [ message, ms ])
22
+ result
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,7 @@
1
+ require 'redis'
2
+ require 'uri'
3
+
4
+ module Crocoduck
5
+ uri = URI.parse "http://localhost:6379"
6
+ Redis = ::Redis.new({ :host => uri.host, :port => uri.port, :password => uri.password })
7
+ end
@@ -0,0 +1,8 @@
1
+ require 'crocoduck/redis'
2
+ require 'resque'
3
+ require 'resque/server'
4
+
5
+ module Crocoduck
6
+ Resque = ::Resque
7
+ ::Resque.redis = Redis
8
+ end
@@ -0,0 +1,19 @@
1
+ require 'sinatra/base'
2
+ require 'crocoduck/entry'
3
+
4
+ module Crocoduck
5
+ class Server < Sinatra::Base
6
+ set :root, File.dirname(__FILE__)
7
+
8
+ get "/" do
9
+ erb :index
10
+ end
11
+
12
+ post "/" do
13
+ entry_id = params[:entry_id]
14
+ entry = Entry.new entry_id
15
+ entry.schedule
16
+ redirect "/"
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,84 @@
1
+ # The Crocoduck::Store object handles the concern of talking to your
2
+ # data storage layer. By default, we have implemented this on top
3
+ # of MongoDB, so it may be that many of the choices made here highly
4
+ # favor document-based databases.
5
+ require 'mongo'
6
+ require 'crocoduck/logging'
7
+
8
+ module Crocoduck
9
+ class Store
10
+ include Logging
11
+ # We have several class properties that defined how all Store
12
+ # objects will connect and query for information. As stated
13
+ # before, many of these will only make sense for MongoDB or
14
+ # other similar document-based databases.
15
+ @id_field = '_id'
16
+ @server_cluster = nil
17
+ @server_db = nil
18
+ @server_collection = nil
19
+
20
+ class << self
21
+ attr_accessor :id_field, :server_cluster, :server_db, :server_collection
22
+ end
23
+
24
+ attr_accessor :store, :database, :collection
25
+
26
+ # A nice method to determine if there is enough information
27
+ # to potentially connect to the backing database.
28
+ def setup?
29
+ Crocoduck::Store.server_cluster &&
30
+ Crocoduck::Store.server_db &&
31
+ Crocoduck::Store.server_collection
32
+ end
33
+
34
+ def close
35
+ store.close
36
+ end
37
+
38
+ # A simple convienance method to update a single
39
+ # document in your datastore.
40
+ def update(entry_id, field, value)
41
+ collection.update({
42
+ Crocoduck::Store.id_field => entry_id},
43
+ {'$set' => { field => value}
44
+ }, :safe => true)
45
+ end
46
+
47
+ # Returns a single document given its ID
48
+ def get(id)
49
+ collection.find_one({
50
+ Crocoduck::Store.id_field => id.to_i
51
+ })
52
+ end
53
+
54
+ # Use this method to remove documents from your datastore. Cares
55
+ # has been taken to prevent accidental database destruction. Only
56
+ # pass {} to this method if you are 100% sure you want to clear the
57
+ # database.
58
+ def remove(criteria=nil)
59
+ return if criteria.nil?
60
+ collection.remove criteria
61
+ end
62
+
63
+ # Inserts a brand new document into the database
64
+ def insert(document)
65
+ collection.insert document
66
+ end
67
+
68
+ private
69
+
70
+ # These methods create and cache objects that maintain the state and
71
+ # connectivity to the backend storage.
72
+ def collection
73
+ @collection ||= database.collection Crocoduck::Store.server_collection
74
+ end
75
+
76
+ def database
77
+ @database ||= store.db(Crocoduck::Store.server_db)
78
+ end
79
+
80
+ def store
81
+ @store ||= Mongo::ReplSetConnection.new(*Crocoduck::Store.server_cluster)
82
+ end
83
+ end
84
+ end
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crocoduck
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 5
10
+ version: 0.0.5
11
+ platform: ruby
12
+ authors:
13
+ - Clint Ecker
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-06-17 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: redis
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: resque
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: sinatra
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ type: :runtime
62
+ version_requirements: *id003
63
+ - !ruby/object:Gem::Dependency
64
+ name: mongo
65
+ prerelease: false
66
+ requirement: &id004 !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ type: :runtime
76
+ version_requirements: *id004
77
+ - !ruby/object:Gem::Dependency
78
+ name: rdiscount
79
+ prerelease: false
80
+ requirement: &id005 !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ hash: 3
86
+ segments:
87
+ - 0
88
+ version: "0"
89
+ type: :development
90
+ version_requirements: *id005
91
+ description: " Crocoduck is a Resque job system that seeks to model the pattern of mutating MongoDB documents.\n"
92
+ email: me@clintecker.com
93
+ executables: []
94
+
95
+ extensions: []
96
+
97
+ extra_rdoc_files: []
98
+
99
+ files:
100
+ - lib/crocoduck/entry.rb
101
+ - lib/crocoduck/job.rb
102
+ - lib/crocoduck/logging.rb
103
+ - lib/crocoduck/redis.rb
104
+ - lib/crocoduck/resque.rb
105
+ - lib/crocoduck/server.rb
106
+ - lib/crocoduck/store.rb
107
+ has_rdoc: true
108
+ homepage: https://github.com/clintecker/crocoduck
109
+ licenses: []
110
+
111
+ post_install_message:
112
+ rdoc_options: []
113
+
114
+ require_paths:
115
+ - lib
116
+ required_ruby_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ hash: 3
122
+ segments:
123
+ - 0
124
+ version: "0"
125
+ required_rubygems_version: !ruby/object:Gem::Requirement
126
+ none: false
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ hash: 3
131
+ segments:
132
+ - 0
133
+ version: "0"
134
+ requirements: []
135
+
136
+ rubyforge_project:
137
+ rubygems_version: 1.4.1
138
+ signing_key:
139
+ specification_version: 3
140
+ summary: Resque Jobs working on MongoDB documents
141
+ test_files: []
142
+