postqueue 0.2.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,23 @@
1
+ module Tracker
2
+ module Migration
3
+ class Options < OpenStruct
4
+ def render(erb)
5
+ erb.result(binding)
6
+ end
7
+ end
8
+
9
+ def migrate!
10
+ template = File.read(File.dirname(__FILE__) + "/tracker.sql")
11
+ renderer = ERB.new(template)
12
+
13
+ options = Options.new(reinstall: false)
14
+ sql = options.render(renderer)
15
+ ActiveRecord::Base.connection.execute sql
16
+ end
17
+
18
+ def track_table!(table)
19
+ sql = "SELECT tracker.track_table('#{table}', 'id', array['created_at','updated_at', 'tsv', 'pg_search'])"
20
+ ActiveRecord::Base.connection.execute sql
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,45 @@
1
+ module Tracker
2
+ module Registry
3
+ extend self
4
+
5
+ def reset!
6
+ @registrations = nil
7
+ end
8
+
9
+ private
10
+
11
+ def registrations
12
+ @registrations ||= Hash.new { |h, k| h[k] = [] }
13
+ end
14
+
15
+ def register_callback(event, &proc)
16
+ STDERR.puts "Starting to track #{event.inspect}"
17
+ callbacks(event) << proc
18
+ end
19
+
20
+ def callbacks(event)
21
+ registrations[event]
22
+ end
23
+
24
+ public
25
+
26
+ def on(event, &proc)
27
+ expect! event => /(insert|delete|update)$/
28
+ register_callback event, &proc
29
+ end
30
+
31
+ def track(table, &proc)
32
+ register_callback table, &proc
33
+ end
34
+
35
+ def tracks?(table)
36
+ !callbacks(table).empty?
37
+ end
38
+
39
+ def publish!(event_name, *args)
40
+ callbacks(event_name).each do |callback|
41
+ callback.call(*args)
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,231 @@
1
+ -- An tracker history is important on most tables. Provide an tracker trigger that logs to
2
+ -- a dedicated tracker table for the major relations.
3
+ --
4
+ -- This file should be generic and not depend on application roles or structures,
5
+ -- as it's being listed here:
6
+ --
7
+ -- https://wiki.postgresql.org/wiki/Audit_trigger_91plus
8
+ --
9
+ -- This trigger was originally based on
10
+ -- http://wiki.postgresql.org/wiki/Audit_trigger
11
+ -- but has been completely rewritten.
12
+ --
13
+ -- Should really be converted into a relocatable EXTENSION, with control and upgrade files.
14
+
15
+ CREATE EXTENSION IF NOT EXISTS hstore;
16
+
17
+ <% if true %>
18
+ DROP SCHEMA IF EXISTS tracker CASCADE;
19
+ <% end %>
20
+ CREATE SCHEMA tracker;
21
+ REVOKE ALL ON SCHEMA tracker FROM public;
22
+
23
+ COMMENT ON SCHEMA tracker IS 'Out-of-table tracing';
24
+
25
+ CREATE TYPE tracker.actions AS ENUM(
26
+ 'INSERT', 'UPDATE', 'DELETE'
27
+ );
28
+
29
+ --
30
+ -- Audited data. Lots of information is available, it's just a matter of how much
31
+ -- you really want to record. See:
32
+ --
33
+ -- http://www.postgresql.org/docs/9.1/static/functions-info.html
34
+ --
35
+ -- Remember, every column you add takes up more tracker table space and slows tracker
36
+ -- inserts.
37
+ --
38
+ -- Every index you add has a big impact too, so avoid adding indexes to the
39
+ -- tracker table unless you REALLY need them. The hstore GIST indexes are
40
+ -- particularly expensive.
41
+ --
42
+ -- It is sometimes worth copying the tracker table, or a coarse subset of it that
43
+ -- you're interested in, into a temporary table where you CREATE any useful
44
+ -- indexes and do your analysis.
45
+ --
46
+ CREATE TABLE tracker.events (
47
+ id bigserial primary key, -- id IS 'Unique identifier for each tracked event';
48
+ table_oid oid not null, -- table_oid IS 'Table OID. Changes with drop/create. Get with ''tablename''::regclass';
49
+ table_schema text not null, -- table_schema IS 'Database schema tracked table for this event is in';
50
+ table_name text not null, -- table_name IS 'Non-schema-qualified table name of table event occured in';
51
+ action tracker.actions, -- event's action: INSERT, UPDATE, or DELETE
52
+ entity_pkey text not null, -- entity_pkey IS 'ROW primary key.';
53
+ row_data hstore, -- row_data IS 'Record value. For INSERT this is the new tuple. For DELETE and UPDATE it is the old tuple.';
54
+ changed_fields hstore, -- changed_fields IS 'New values of fields changed by UPDATE. Null except for UPDATE events.';
55
+ created_at TIMESTAMP WITH TIME ZONE NOT NULL, -- created_at IS 'Wall clock time at which tracked event''s trigger call occurred';
56
+ checked_out_at TIMESTAMP WITH TIME ZONE -- checked_out_at IS 'Wall clock time at which event is checked out for processing';
57
+
58
+ -- session_user_name text, -- session_user_name IS 'Login / session user whose statement caused the tracked event';
59
+ );
60
+
61
+
62
+ REVOKE ALL ON tracker.events FROM public;
63
+
64
+ /*
65
+
66
+ [TODO] - add comments on columns
67
+
68
+ */
69
+
70
+ COMMENT ON TABLE tracker.events IS 'History of tracked actions on tracked tables, from tracker.if_modified_func()';
71
+ -- COMMENT ON COLUMN tracker.events.session_user_name IS 'Login / session user whose statement caused the tracked event';
72
+
73
+ /*
74
+
75
+ [TODO] - create indices for common search patterns
76
+
77
+ */
78
+
79
+ -- CREATE INDEX events_table_oid_idx ON tracker.events(table_oid);
80
+ -- CREATE INDEX events_action_idx ON tracker.events(action);
81
+
82
+ /* [TODO] - change checked_out_at to default to 'infinity' */
83
+
84
+
85
+ /*
86
+
87
+ Track changes to a table at row level.
88
+
89
+ Optional parameters to trigger in CREATE TRIGGER call:
90
+
91
+ param 0: text, name of primary key column;
92
+ param 1: text[], columns to ignore in updates. Default [].
93
+
94
+ Updates to ignored cols are omitted from changed_fields.
95
+
96
+ Updates with only ignored cols changed are not inserted
97
+ into the tracker log.
98
+
99
+ Note that the user name logged is the login role for the session.
100
+ The tracker trigger cannot obtain the active role because it is reset
101
+ by the SECURITY DEFINER invocation of the tracker trigger its self.
102
+
103
+ */
104
+
105
+ CREATE OR REPLACE FUNCTION tracker.if_modified_func() RETURNS TRIGGER AS $body$
106
+ DECLARE
107
+ event tracker.events;
108
+ h_old hstore;
109
+ h_new hstore;
110
+ excluded_cols text[] = ARRAY[]::text[];
111
+ entity_pkey_name text;
112
+ BEGIN
113
+ IF TG_WHEN <> 'AFTER' THEN
114
+ RAISE EXCEPTION 'tracker.if_modified_func() may only run as an AFTER trigger';
115
+ END IF;
116
+
117
+ -- get args
118
+
119
+ entity_pkey_name = TG_ARGV[0];
120
+
121
+ IF TG_ARGV[1] IS NOT NULL THEN
122
+ excluded_cols = TG_ARGV[1]::text[];
123
+ END IF;
124
+
125
+ -- fill row
126
+
127
+ event = ROW(
128
+ nextval('tracker.events_id_seq'), -- id
129
+ TG_RELID, -- relation OID for much quicker searches
130
+ TG_TABLE_SCHEMA::text, -- table_schema
131
+ TG_TABLE_NAME::text, -- table_name
132
+ TG_OP::tracker.actions, -- 'INSERT'/'UPDATE'/'DELETE'
133
+ NULL, -- primary key
134
+ NULL, -- row_data
135
+ NULL, -- changed_fields
136
+ clock_timestamp(), -- created_at
137
+ NULL -- checked_out_at
138
+ );
139
+
140
+ IF (TG_OP = 'UPDATE') THEN
141
+ h_old = hstore(OLD.*);
142
+ h_new = hstore(NEW.*);
143
+
144
+ event.entity_pkey = h_old -> entity_pkey_name;
145
+ event.row_data = h_old - excluded_cols;
146
+ event.changed_fields = (h_new - event.row_data) - excluded_cols;
147
+ IF event.changed_fields = hstore('') THEN
148
+ -- All changed fields are ignored. Skip this update.
149
+ RETURN NULL;
150
+ END IF;
151
+ ELSIF (TG_OP = 'DELETE') THEN
152
+ h_old = hstore(OLD.*);
153
+ event.entity_pkey = h_old -> entity_pkey_name;
154
+ event.row_data = h_old - excluded_cols;
155
+ ELSIF (TG_OP = 'INSERT') THEN
156
+ h_new = hstore(NEW.*);
157
+ event.entity_pkey = h_new -> entity_pkey_name;
158
+ event.row_data = h_new - excluded_cols;
159
+ END IF;
160
+ INSERT INTO tracker.events VALUES (event.*);
161
+ RETURN NULL;
162
+ END;
163
+ $body$
164
+ LANGUAGE plpgsql
165
+ SECURITY DEFINER
166
+ SET search_path = pg_catalog, public;
167
+
168
+ /*
169
+
170
+ Add tracking support to a table.
171
+
172
+ Arguments:
173
+ target_table: Table name, schema qualified if not on search_path
174
+ primary_key_name: Name of primary key column
175
+ ignored_cols: Columns to exclude from update diffs, ignore updates that change only ignored cols.
176
+
177
+ */
178
+
179
+ CREATE OR REPLACE FUNCTION tracker.track_table(target_table regclass, entity_pkey_name text, ignored_cols text[])
180
+ RETURNS void AS $body$
181
+ DECLARE
182
+ _q_txt text;
183
+ _ignored_cols_snip text = '';
184
+ BEGIN
185
+ IF array_length(ignored_cols,1) > 0 THEN
186
+ _ignored_cols_snip = ', ' || quote_literal(ignored_cols);
187
+ END IF;
188
+
189
+ EXECUTE 'DROP TRIGGER IF EXISTS track_trigger_row ON ' || quote_ident(target_table::TEXT);
190
+
191
+ _q_txt = 'CREATE TRIGGER track_trigger_row AFTER INSERT OR UPDATE OR DELETE ON ' ||
192
+ quote_ident(target_table::TEXT) ||
193
+ ' FOR EACH ROW EXECUTE PROCEDURE tracker.if_modified_func(' ||
194
+ quote_literal(entity_pkey_name) ||
195
+ _ignored_cols_snip || ');';
196
+ EXECUTE _q_txt;
197
+ END;
198
+ $body$
199
+ language 'plpgsql';
200
+
201
+
202
+ /*
203
+
204
+ Add tracking support to the given table. No cols are ignored. (Shortcut)
205
+
206
+ */
207
+
208
+ CREATE OR REPLACE FUNCTION tracker.track_table(target_table regclass) RETURNS void AS $body$
209
+ SELECT tracker.track_table($1, 'id', ARRAY[]::text[]);
210
+ $body$ LANGUAGE 'sql';
211
+
212
+ COMMENT ON FUNCTION tracker.track_table(regclass) IS $body$
213
+ $body$;
214
+
215
+
216
+ --
217
+ ------------------------------------------------------------------------------------
218
+ --
219
+
220
+ -- SELECT tracker.track_table('public.posts');
221
+
222
+ --SELECT tracker.track_table('public.users', 'id', array['created_at','updated_at']);
223
+ -- -- SELECT tracker.track_table('public.users', ARRAY[]::text[]);
224
+ -- -- SELECT tracker.track_table('public.users');
225
+ --
226
+ -- INSERT INTO users (email, created_at, updated_at) VALUES('me@mo' || (1000 * random())::integer , NOW(), NOW());
227
+ -- UPDATE users SET username='mimi' WHERE email LIKE '%mo%';
228
+ -- UPDATE users SET username='momo' WHERE email LIKE '%mo%';
229
+ -- DELETE FROM users WHERE email LIKE '%mo%';
230
+ --
231
+ -- SELECT table_schema || '.' || table_name || '.' || entity_pkey, action, row_data, changed_fields, created_at FROM tracker.events;
data/lib/tracker.rb ADDED
@@ -0,0 +1,125 @@
1
+ require_relative "tracker/migration"
2
+ require_relative "tracker/registry"
3
+ require_relative "tracker/advisory_lock"
4
+
5
+ # module Tracker
6
+ # extend Registry
7
+ #
8
+ # def self.on(*args, &block)
9
+ # Registry.on(*args, &block)
10
+ # end
11
+ #
12
+ # def self.track(*args, &block)
13
+ # Registry.track(*args, &block)
14
+ # end
15
+ #
16
+ # extend Migration
17
+ #
18
+ # class Event < ActiveRecord::Base
19
+ # self.table_name = "tracker.events"
20
+ #
21
+ # include AdvisoryLock
22
+ #
23
+ # def self.track!(actions)
24
+ # where(id: actions.map(&:id)).update_all(checked_out_at: Time.now)
25
+ # end
26
+ # end
27
+ #
28
+ # def self.check_out_events
29
+ # Event.exclusive do
30
+ # event = Event.where("checked_out_at IS NULL OR checked_out_at < ?", Time.now - 10.minutes).order(:id).first
31
+ # return [] unless event
32
+ #
33
+ # if Registry.tracks?(event.table_name)
34
+ # events = Event.where(table_name: event.table_name, entity_pkey: event.entity_pkey).to_a
35
+ # else
36
+ # events = [ event ]
37
+ # end
38
+ #
39
+ # Event.where(id: events.map(&:id)).update_all(checked_out_at: Time.now)
40
+ # events
41
+ # end
42
+ # end
43
+ #
44
+ # def self.check_all!
45
+ # loop do
46
+ # events = check_out_events
47
+ # break if events.empty?
48
+ #
49
+ # events.each do |event|
50
+ # publish_event(event)
51
+ # end
52
+ #
53
+ # next if events.length == 1
54
+ #
55
+ # # generate a table level event if necessary:
56
+ # #
57
+ # # A table level event is the last event, but only if the last event
58
+ # # is not a delete where the accompanying "create" is also in the
59
+ # # "events" array. In that case it would be the first entry
60
+ #
61
+ # first_event = events.first
62
+ # last_event = events.last
63
+ # if first_event.action != "INSERT" && last_event.action != "DELETE"
64
+ # publish_event(last_event, event_name: last_event.table_name)
65
+ # end
66
+ #
67
+ # destroyed = Event.where(id: events.map(&:id)).delete_all
68
+ # puts "destroyed #{destroyed} events"
69
+ # end
70
+ # end
71
+ #
72
+ # def self.publish_event(event, event_name: nil)
73
+ # event_name ||= "#{event.table_name}.#{event.action.to_s.downcase}"
74
+ # Registry.publish! event_name, event.entity_pkey, event.row_data, event.changed_fields, event
75
+ # end
76
+ # end
77
+ #
78
+ # Tracker.migrate!
79
+ # Tracker.track_table! "public.posts"
80
+ #
81
+ # Tracker::Registry.reset!
82
+ #
83
+ # #
84
+ # # Is called after a posts entry was created
85
+ # Tracker.on "posts.insert" do |id, attrs|
86
+ # puts "--> created post ##{id} with attrs #{attrs.inspect}"
87
+ # end
88
+ #
89
+ # #
90
+ # # Is called after a posts entry was destroyed. attrs contains
91
+ # # the previous attributes of the posts table.
92
+ # Tracker.on "posts.delete" do |id, attrs|
93
+ # puts "--> deleted post ##{id} with attrs #{attrs.inspect}"
94
+ # end
95
+ #
96
+ # #
97
+ # # Is called whenever a post was changed. attrs contains the new
98
+ # # set of attributes, changed_attrs contains the changed attributes.
99
+ # Tracker.on "posts.update" do |id, attrs, changed_attrs|
100
+ # puts "--> updated post ##{id} with attrs #{attrs.inspect}\n changed_attrs: #{changed_attrs.inspect}"
101
+ # end
102
+ #
103
+ # post = Post.create! body: "bobo", author: User.first, group: User.first.wall
104
+ # post.update! body: "yiha"
105
+ # post.update! body: "banzai"
106
+ # post.destroy
107
+ #
108
+ # Tracker.check_all!
109
+ #
110
+ # #
111
+ # # Is called with the latest attrs for the "posts" entry.
112
+ # Tracker.track "posts" do |id, attrs|
113
+ # puts "--> tracked post: id: #{id}, attrs: #{attrs}"
114
+ # end
115
+ #
116
+ # post = Post.create! body: "bobo", author: User.first, group: User.first.wall
117
+ # post.update! body: "yiha"
118
+ # post.update! body: "banzai"
119
+ # Tracker.check_all!
120
+ #
121
+ # post = Post.create! body: "bobo", author: User.first, group: User.first.wall
122
+ # Tracker.check_all!
123
+ #
124
+ # post.destroy
125
+ # Tracker.check_all!
@@ -0,0 +1,77 @@
1
+ require "spec_helper"
2
+
3
+ describe "concurrency tests" do
4
+ # -- helper methods ---------------------------------------------------------
5
+
6
+ def processed_ids
7
+ File.read(LOG_FILE).split("\n").map(&:to_i)
8
+ end
9
+
10
+ def benchmark(msg, &block)
11
+ realtime = Benchmark.realtime(&block)
12
+ STDERR.puts "#{msg}: #{'%.3f secs' % realtime}"
13
+ realtime
14
+ end
15
+
16
+ LOG_FILE = "log/test-runner.log"
17
+
18
+ # Each runner writes the processed message into the LOG_FILE
19
+ def runner
20
+ ActiveRecord::Base.connection_pool.with_connection do |_conn|
21
+ log = File.open(LOG_FILE, "a")
22
+ queue = Postqueue.new
23
+ queue.on '*' do |_op, entity_ids|
24
+ sleep(0.0001); log.write "#{entity_ids.first}\n"
25
+ end
26
+ queue.process_until_empty
27
+ log.close
28
+ end
29
+ rescue => e
30
+ STDERR.puts "runner aborts: #{e}, from #{e.backtrace.first}"
31
+ end
32
+
33
+ def run_scenario(cnt, n_threads)
34
+ FileUtils.rm_rf LOG_FILE
35
+
36
+ queue = Postqueue.new do |queue|
37
+ # queue.default_batch_size = 10
38
+ end
39
+
40
+ benchmark "enqueuing #{cnt} ops" do
41
+ queue.enqueue op: "myop", entity_id: (1..cnt)
42
+ end
43
+
44
+ benchmark "processing #{cnt} ops with #{n_threads} threads" do
45
+ if n_threads == 0
46
+ runner
47
+ else
48
+ Array.new(n_threads) { Thread.new { runner } }.each(&:join)
49
+ end
50
+ end
51
+ end
52
+
53
+ # -- tests start here -------------------------------------------------------
54
+
55
+ it "runs faster with multiple runners", transactions: false do
56
+ # For small cnt values here the test below actually fails.
57
+ cnt = 1000
58
+
59
+ t0_runtime = run_scenario cnt, 0
60
+ expect(processed_ids).to contain_exactly(*(1..cnt).to_a)
61
+
62
+ t4_runtime = run_scenario cnt, 4
63
+ expect(processed_ids).to contain_exactly(*(1..cnt).to_a)
64
+ expect(t4_runtime).to be < t0_runtime * 0.8
65
+ end
66
+
67
+ it "enqueues many entries" do
68
+ cnt = 1000
69
+
70
+ queue = Postqueue.new do |queue|
71
+ # queue.default_batch_size = 10
72
+ end
73
+ benchmark "enqueuing #{cnt} ops" do
74
+ queue.enqueue op: "myop", entity_id: (1..cnt)
75
+ end
76
+ end
77
+ end
@@ -1,8 +1,9 @@
1
1
  require "spec_helper"
2
2
 
3
3
  describe "enqueuing" do
4
- let(:queue) { Postqueue::Base.new }
5
- let(:item) { queue.items.first }
4
+ let(:queue) { Postqueue.new }
5
+ let(:items) { queue.item_class.all }
6
+ let(:item) { queue.item_class.first }
6
7
 
7
8
  context "when enqueueing entries" do
8
9
  before do
@@ -20,39 +21,4 @@ describe "enqueuing" do
20
21
  expect(item.failed_attempts).to eq(0)
21
22
  end
22
23
  end
23
-
24
- context "when enqueueing identical duplicate entries" do
25
- before do
26
- queue.enqueue op: "duplicate", entity_id: 12, duplicate: duplicate
27
- queue.enqueue op: "duplicate", entity_id: 13, duplicate: duplicate
28
- queue.enqueue op: "duplicate", entity_id: 12, duplicate: duplicate
29
- queue.enqueue op: "duplicate", entity_id: 12, duplicate: duplicate
30
- queue.enqueue op: "duplicate", entity_id: 12, duplicate: duplicate
31
- queue.enqueue op: "no-duplicate", entity_id: 13, duplicate: duplicate
32
- end
33
-
34
- context "when duplicates are permitted" do
35
- let(:duplicate) { true }
36
-
37
- it "does not skip duplicates" do
38
- expect(items.map(&:entity_id)).to eq([12, 13, 12, 12, 12, 13])
39
- end
40
- end
41
-
42
- context "when duplicates are not permitted" do
43
- let(:duplicate) { false }
44
-
45
- it "skips later duplicates" do
46
- expect(items.map(&:entity_id)).to eq([12, 13, 13])
47
- end
48
- end
49
- end
50
-
51
- context "when enqueueing many entries" do
52
- it "adds all entries skipping duplicates" do
53
- queue.enqueue op: "duplicate", entity_id: 12, duplicate: false
54
- queue.enqueue op: "duplicate", entity_id: [13, 12, 12, 13, 14], duplicate: false
55
- expect(items.map(&:entity_id)).to eq([12, 13, 14])
56
- end
57
- end
58
24
  end
@@ -0,0 +1,66 @@
1
+ require "spec_helper"
2
+
3
+ describe "idempotent operations" do
4
+ let(:queue) do
5
+ Postqueue.new do |queue|
6
+ queue.batch_sizes["batchable"] = 10
7
+ queue.idempotent_operation "idempotent"
8
+ end
9
+ end
10
+
11
+ let(:items) { queue.item_class.all }
12
+ let(:item) { queue.item_class.first }
13
+
14
+ context "when enqueueing many entries" do
15
+ before do
16
+ queue.enqueue op: "idempotent", entity_id: 12
17
+ queue.enqueue op: "idempotent", entity_id: 13
18
+ queue.enqueue op: "idempotent", entity_id: 12
19
+ queue.enqueue op: "idempotent", entity_id: 12
20
+ queue.enqueue op: "idempotent", entity_id: 12
21
+ queue.enqueue op: "no-duplicate", entity_id: 14
22
+ queue.enqueue op: "no-duplicate", entity_id: 14
23
+ end
24
+
25
+ it "does not skip non-duplicates" do
26
+ entity_ids = items.select { |i| i.op == "no-duplicate" }.map(&:entity_id)
27
+ expect(entity_ids).to eq([14, 14])
28
+ end
29
+
30
+ it "skips duplicates" do
31
+ entity_ids = items.select { |i| i.op == "idempotent" }.map(&:entity_id)
32
+ expect(entity_ids).to eq([12, 13])
33
+ end
34
+ end
35
+
36
+ context "when enqueueing many entries" do
37
+ it "skips duplicates in entries" do
38
+ queue.enqueue op: "idempotent", entity_id: 12
39
+ queue.enqueue op: "idempotent", entity_id: [13, 12, 12, 13, 14]
40
+ queue.enqueue op: "idempotent", entity_id: 14
41
+ expect(items.map(&:entity_id)).to eq([12, 13, 14])
42
+ end
43
+ end
44
+
45
+ context "when processing entries" do
46
+ let(:callback_invocations) { @callback_invocations ||= [] }
47
+
48
+ before do
49
+ queue.enqueue op: "idempotent", entity_id: 12
50
+ queue.item_class.insert_item(op: "idempotent", entity_id: 12)
51
+
52
+ queue.on "idempotent" do |op, entity_ids|
53
+ callback_invocations << [ op, entity_ids ]
54
+ end
55
+ queue.process
56
+ end
57
+
58
+ it "runs the process callback only once" do
59
+ expect(callback_invocations.length).to eq(1)
60
+ end
61
+
62
+ it "removes all items" do
63
+ expect(items.count).to eq(0)
64
+ end
65
+ end
66
+ end