worker_plugins 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 46a8d25556ad9d41f62307376bf457a790ab3e67e934678cac0ad9b754112e8e
4
- data.tar.gz: 2163df96c0f85a9102f1694fbeda59f06f70c296924e0a9dd9320a0b239e0f15
3
+ metadata.gz: 343e7440c6234a58f501c96d553f019c653c7eb32c8dc07847d4f02d9ee8020b
4
+ data.tar.gz: 5100e6c819ee4f818082e87ef515e7362523b7e1c5b0c25ca386741e07e9d8ec
5
5
  SHA512:
6
- metadata.gz: ef92d06893d7b9a1e56ade70c4b78432c0cd3b04544db92c5827865883d97b9912fc392930e53d26324c6bb6dbec23481e101fb4ba0218c51a2e52ef1634c497
7
- data.tar.gz: 24c4313891e0864b95695af807eca5586739c997039cc3bb54b82050964af2dbd92806c90b3e7077d8acdd93f5b636dd306e6300331961b41ef71d424f8a099a
6
+ metadata.gz: a658e1f90e8f9c33503e526b98a59ef578e9a366dc388d79abbf8efec602c60fe5c920afa4e0772a113de5e47f4d71eba4bd7346dfc38dcbefd31ac7808deca1
7
+ data.tar.gz: c73827b981b2d50abc9f5bd3c388645fa382f0d052188f3ff85f5f3e0e8ac1d47e62ba01a2fa73a2d1e6d4c78475870f36cbb520723154491bffe44245536db6
data/README.md CHANGED
@@ -27,6 +27,46 @@ Optimally loop over resources on a workspace:
27
27
  workspace.each_resource(types: ['User']) do |user|
28
28
  ```
29
29
 
30
+ ## Scheduled cleanup of unused workplaces
31
+
32
+ `WorkerPlugins::DeleteOldWorkplaces` removes workplaces that haven't seen activity in a given window — both the workplace row's `updated_at` is older than the cutoff *and* no link on it has been created or updated since. Deletion runs in batches via raw `delete_all` to skip per-row callbacks.
33
+
34
+ ```ruby
35
+ result = WorkerPlugins::DeleteOldWorkplaces.execute!(older_than: 2.months)
36
+ # => {workplaces_deleted: <N>, links_deleted: <M>}
37
+ ```
38
+
39
+ Options:
40
+
41
+ - `older_than:` (required) — any object that responds to `.ago` (typically an `ActiveSupport::Duration` like `2.months` or `30.days`). The service computes the cutoff at call time.
42
+ - `batch_size:` (default `1000`) — how many stale workplaces to delete per round-trip.
43
+
44
+ The gem does not register a scheduler of its own. Wire the service into your application's background queue. Example with `sidekiq-scheduler`:
45
+
46
+ ```ruby
47
+ # config/sidekiq.yml
48
+ :scheduler:
49
+ :schedule:
50
+ DeleteOldWorkplaces:
51
+ cron: "0 40 3 * * *" # daily at 03:40 local time
52
+ args: ["WorkerPlugins::DeleteOldWorkplaces", {"older_than": "2.months"}]
53
+ class: ServiceScheduler # or whatever your project's service-dispatching worker is called
54
+ queue: low_priority
55
+ ```
56
+
57
+ If your `ServiceScheduler` only accepts YAML-serializable arguments, wrap the call in a thin application-side service:
58
+
59
+ ```ruby
60
+ class Workplaces::DeleteOld < ApplicationService
61
+ def perform
62
+ WorkerPlugins::DeleteOldWorkplaces.execute!(older_than: 2.months)
63
+ succeed!
64
+ end
65
+ end
66
+ ```
67
+
68
+ and schedule `Workplaces::DeleteOld` instead.
69
+
30
70
  ## Release
31
71
 
32
72
  Run the release task from a clean worktree:
@@ -36,4 +36,22 @@ class WorkerPlugins::ApplicationService < ServicePattern::Service
36
36
 
37
37
  adapter_name.include?("mysql") || adapter_name.include?("trilogy")
38
38
  end
39
+
40
+ # True when a relation applies no row-narrowing scope — so filtering
41
+ # workplace links with `resource_id IN (SELECT ... FROM <target_table>)` adds
42
+ # no semantic value and just forces the database to materialize every row
43
+ # of the target model. Call sites (RemoveQuery, QueryLinksStatus) use this
44
+ # to drop the subquery and count/delete by `resource_type` alone on large
45
+ # target models (e.g. 340k+ users).
46
+ def relation_unscoped?(relation)
47
+ relation.where_clause.empty? &&
48
+ relation.joins_values.empty? &&
49
+ relation.left_outer_joins_values.empty? &&
50
+ relation.group_values.empty? &&
51
+ relation.having_clause.empty? &&
52
+ relation.limit_value.nil? &&
53
+ relation.offset_value.nil? &&
54
+ relation.from_clause.value.nil? &&
55
+ relation.with_values.empty?
56
+ end
39
57
  end
@@ -0,0 +1,48 @@
1
+ class WorkerPlugins::DeleteOldWorkplaces < WorkerPlugins::ApplicationService
2
+ arguments :older_than
3
+ argument :batch_size, default: 1_000
4
+
5
+ # Deletes workplaces that haven't seen any activity since `older_than.ago` —
6
+ # both the workplace record itself is older than the cutoff *and* none of
7
+ # its links have been created / updated since. Links on deleted workplaces
8
+ # are removed with the parent via `dependent: :destroy`, but this service
9
+ # uses raw `delete_all` in batches to skip per-row callbacks and keep
10
+ # long-running cleanup jobs cheap.
11
+ #
12
+ # Intended to be scheduled by the consumer application from a Sidekiq
13
+ # worker (or equivalent) — the gem does not register a scheduler of its
14
+ # own.
15
+ def perform
16
+ cutoff = older_than.ago
17
+ workplaces_deleted = 0
18
+ links_deleted = 0
19
+
20
+ stale_workplaces(cutoff).in_batches(of: batch_size) do |batch|
21
+ batch_ids = batch.pluck(:id)
22
+
23
+ links_deleted += WorkerPlugins::WorkplaceLink
24
+ .where(workplace_id: batch_ids)
25
+ .delete_all
26
+ workplaces_deleted += WorkerPlugins::Workplace
27
+ .where(id: batch_ids)
28
+ .delete_all
29
+ end
30
+
31
+ succeed!(workplaces_deleted:, links_deleted:)
32
+ end
33
+
34
+ def stale_workplaces(cutoff)
35
+ workplaces_table = quote_table(WorkerPlugins::Workplace.table_name)
36
+ links_table = quote_table(WorkerPlugins::WorkplaceLink.table_name)
37
+
38
+ WorkerPlugins::Workplace
39
+ .where("#{workplaces_table}.#{quote_column(:updated_at)} < ?", cutoff)
40
+ .where(<<~SQL.squish, cutoff)
41
+ NOT EXISTS (
42
+ SELECT 1 FROM #{links_table}
43
+ WHERE #{links_table}.#{quote_column(:workplace_id)} = #{workplaces_table}.#{quote_column(:id)}
44
+ AND #{links_table}.#{quote_column(:updated_at)} >= ?
45
+ )
46
+ SQL
47
+ end
48
+ end
@@ -2,12 +2,8 @@ class WorkerPlugins::QueryLinksStatus < WorkerPlugins::ApplicationService
2
2
  arguments :query, :workplace
3
3
 
4
4
  def perform
5
- checked_count = workplace
6
- .workplace_links
7
- .where(resource_type: query.klass.name, resource_id: query_with_selected_ids)
8
- .count
9
-
10
5
  query_count = query.count
6
+ checked_count = count_linked_rows
11
7
 
12
8
  succeed!(
13
9
  all_checked: query_count == checked_count,
@@ -17,6 +13,49 @@ class WorkerPlugins::QueryLinksStatus < WorkerPlugins::ApplicationService
17
13
  )
18
14
  end
19
15
 
16
+ def count_linked_rows
17
+ base_scope = workplace.workplace_links.where(resource_type: query.klass.name)
18
+
19
+ # When the query applies no scoping, the original `resource_id IN (SELECT
20
+ # DISTINCT <target_table>.id FROM <target_table>)` subquery materialized
21
+ # every row of the target model just to count — 2+ seconds on a 340k-row
22
+ # target. We drop the DISTINCT subquery and instead `INNER JOIN` the
23
+ # target table on its primary key so the composite index on links drives
24
+ # the scan and each matching link does a cheap PK probe to confirm the
25
+ # target row still exists. Orphaned links (whose target has since been
26
+ # deleted) are correctly excluded from the count, so `checked_count`
27
+ # never exceeds `query_count`.
28
+ return base_scope.joins(unscoped_target_join_sql).count if relation_unscoped?(query)
29
+
30
+ base_scope.where(resource_id: query_with_selected_ids).count
31
+ end
32
+
33
+ def unscoped_target_join_sql
34
+ target_table = quote_table(query.klass.table_name)
35
+ target_pk = "#{target_table}.#{quote_column(query.klass.primary_key)}"
36
+ resource_id_column = "#{quote_table(WorkerPlugins::WorkplaceLink.table_name)}.#{quote_column(:resource_id)}"
37
+
38
+ "INNER JOIN #{target_table} ON #{target_pk} = #{resource_id_expression_for_join(resource_id_column)}"
39
+ end
40
+
41
+ # On MySQL / MariaDB and SQLite, implicit conversion handles comparing the
42
+ # target's primary key against the VARCHAR `resource_id` column. Postgres
43
+ # is strict about types and needs an explicit cast when they differ.
44
+ def resource_id_expression_for_join(resource_id_column)
45
+ return resource_id_column unless postgres?
46
+
47
+ target_pk_type = query.klass.column_for_attribute(query.klass.primary_key).type
48
+ resource_id_type = WorkerPlugins::WorkplaceLink.column_for_attribute(:resource_id).type
49
+
50
+ return resource_id_column if target_pk_type == resource_id_type
51
+
52
+ case target_pk_type
53
+ when :uuid then "CAST(#{resource_id_column} AS UUID)"
54
+ when :integer then "CAST(#{resource_id_column} AS BIGINT)"
55
+ else resource_id_column
56
+ end
57
+ end
58
+
20
59
  def query_with_selected_ids
21
60
  WorkerPlugins::SelectColumnWithTypeCast.execute!(
22
61
  column_name_to_select: query.klass.primary_key,
@@ -7,33 +7,17 @@ class WorkerPlugins::RemoveQuery < WorkerPlugins::ApplicationService
7
7
 
8
8
  def links_scope
9
9
  scope = workplace.workplace_links.where(resource_type: model_class.name)
10
- return scope if unscoped_query?
10
+ # When the caller's query applies no scoping, the `resource_id IN (SELECT
11
+ # ... FROM <target_table>)` subquery would materialize every row of the
12
+ # target model — the `resource_type = ?` filter alone is enough. Orphaned
13
+ # links (whose resource row has since been deleted) are deleted alongside
14
+ # live ones, which matches caller intent ("remove everything matching")
15
+ # and is the correct thing to do with dead references anyway.
16
+ return scope if relation_unscoped?(@query)
11
17
 
12
18
  scope.where(resource_id: query_with_selected_ids)
13
19
  end
14
20
 
15
- # If the caller's query has no meaningful scoping applied, the `resource_id
16
- # IN (SELECT ... FROM <target_table>)` subquery would simply materialize
17
- # every row of the target model — for 340k+ users that's a full-table scan
18
- # with no semantic effect other than preserving orphaned links. The
19
- # `resource_type = ?` filter alone is enough to pin the DELETE to this
20
- # workplace's links of the given type, so we short-circuit the subquery in
21
- # that case. Orphaned links (whose resource row has since been deleted) are
22
- # deleted alongside live ones, which matches caller intent ("remove
23
- # everything matching the query") and is the correct thing to do with
24
- # dead references anyway.
25
- def unscoped_query?
26
- @query.where_clause.empty? &&
27
- @query.joins_values.empty? &&
28
- @query.left_outer_joins_values.empty? &&
29
- @query.group_values.empty? &&
30
- @query.having_clause.empty? &&
31
- @query.limit_value.nil? &&
32
- @query.offset_value.nil? &&
33
- @query.from_clause.value.nil? &&
34
- @query.with_values.empty?
35
- end
36
-
37
21
  def model_class
38
22
  query.klass
39
23
  end
@@ -1,3 +1,3 @@
1
1
  module WorkerPlugins
2
- VERSION = "0.0.15".freeze
2
+ VERSION = "0.0.16".freeze
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: worker_plugins
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.15
4
+ version: 0.0.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kasper Stöckel
@@ -26,6 +26,7 @@ files:
26
26
  - app/models/worker_plugins/workplace_link.rb
27
27
  - app/services/worker_plugins/add_query.rb
28
28
  - app/services/worker_plugins/application_service.rb
29
+ - app/services/worker_plugins/delete_old_workplaces.rb
29
30
  - app/services/worker_plugins/query_links_status.rb
30
31
  - app/services/worker_plugins/remove_query.rb
31
32
  - app/services/worker_plugins/select_column_with_type_cast.rb