source_monitor 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/agent-memory/vbw-vbw-debugger/MEMORY.md +15 -0
- data/.claude/skills/sm-configuration-setting/reference/settings-catalog.md +3 -3
- data/.claude/skills/sm-configure/reference/configuration-reference.md +3 -3
- data/.claude/skills/sm-domain-model/SKILL.md +2 -2
- data/.claude/skills/sm-domain-model/reference/table-structure.md +3 -1
- data/.claude/skills/sm-engine-migration/SKILL.md +1 -1
- data/.claude/skills/sm-engine-migration/reference/migration-conventions.md +1 -1
- data/.claude/skills/sm-health-rule/SKILL.md +18 -21
- data/.claude/skills/sm-health-rule/reference/health-system.md +1 -1
- data/.claude/skills/sm-host-setup/reference/initializer-template.md +2 -2
- data/.claude/skills/sm-upgrade/reference/version-history.md +17 -12
- data/CHANGELOG.md +42 -0
- data/CLAUDE.md +2 -2
- data/Gemfile +1 -0
- data/Gemfile.lock +4 -1
- data/README.md +3 -3
- data/VERSION +1 -1
- data/app/assets/builds/source_monitor/application.css +132 -12
- data/app/assets/builds/source_monitor/application.js +25 -1
- data/app/assets/builds/source_monitor/application.js.map +2 -2
- data/app/assets/javascripts/source_monitor/controllers/modal_controller.js +8 -0
- data/app/assets/javascripts/source_monitor/controllers/select_all_controller.js +22 -2
- data/app/assets/stylesheets/source_monitor/application.tailwind.css +1 -1
- data/app/controllers/source_monitor/bulk_scrape_enablements_controller.rb +57 -0
- data/app/controllers/source_monitor/dashboard_controller.rb +10 -1
- data/app/controllers/source_monitor/import_history_dismissals_controller.rb +20 -0
- data/app/controllers/source_monitor/source_retries_controller.rb +10 -2
- data/app/controllers/source_monitor/source_scrape_tests_controller.rb +73 -0
- data/app/controllers/source_monitor/sources_controller.rb +51 -9
- data/app/helpers/source_monitor/application_helper.rb +24 -0
- data/app/helpers/source_monitor/health_badge_helper.rb +7 -20
- data/app/jobs/source_monitor/fetch_feed_job.rb +32 -3
- data/app/jobs/source_monitor/source_health_check_job.rb +1 -1
- data/app/models/source_monitor/fetch_log.rb +4 -0
- data/app/models/source_monitor/import_history.rb +2 -0
- data/app/models/source_monitor/source.rb +47 -2
- data/app/views/source_monitor/dashboard/_fetch_schedule.html.erb +94 -68
- data/app/views/source_monitor/dashboard/_scrape_recommendations.html.erb +17 -0
- data/app/views/source_monitor/dashboard/_stats.html.erb +19 -0
- data/app/views/source_monitor/dashboard/index.html.erb +7 -1
- data/app/views/source_monitor/import_sessions/health_check/_row.html.erb +2 -2
- data/app/views/source_monitor/shared/_pagination.html.erb +74 -0
- data/app/views/source_monitor/source_scrape_tests/_result.html.erb +81 -0
- data/app/views/source_monitor/source_scrape_tests/show.html.erb +60 -0
- data/app/views/source_monitor/sources/_bulk_scrape_enable_modal.html.erb +29 -0
- data/app/views/source_monitor/sources/_details.html.erb +19 -1
- data/app/views/source_monitor/sources/_empty_state_row.html.erb +1 -1
- data/app/views/source_monitor/sources/_import_history_panel.html.erb +12 -5
- data/app/views/source_monitor/sources/_row.html.erb +34 -6
- data/app/views/source_monitor/sources/index.html.erb +184 -132
- data/config/brakeman.ignore +11 -1
- data/config/routes.rb +5 -0
- data/db/migrate/20260305120000_add_dismissed_at_to_import_histories.rb +7 -0
- data/db/migrate/20260306233004_add_error_category_to_fetch_logs.rb +8 -0
- data/db/migrate/20260307120000_add_consecutive_fetch_failures_to_sources.rb +11 -0
- data/db/migrate/20260312120000_simplify_health_status_values.rb +20 -0
- data/docs/configuration.md +9 -1
- data/docs/troubleshooting.md +9 -0
- data/docs/upgrade.md +31 -0
- data/lib/generators/source_monitor/install/templates/source_monitor.rb.tt +2 -3
- data/lib/source_monitor/analytics/scrape_recommendations.rb +27 -0
- data/lib/source_monitor/configuration/health_settings.rb +0 -2
- data/lib/source_monitor/configuration/scraping_settings.rb +8 -1
- data/lib/source_monitor/dashboard/queries/stats_query.rb +12 -1
- data/lib/source_monitor/dashboard/queries.rb +6 -3
- data/lib/source_monitor/dashboard/recent_activity_presenter.rb +6 -5
- data/lib/source_monitor/dashboard/upcoming_fetch_schedule.rb +40 -54
- data/lib/source_monitor/favicons/discoverer.rb +16 -0
- data/lib/source_monitor/favicons/svg_converter.rb +60 -0
- data/lib/source_monitor/fetching/cloudflare_bypass.rb +79 -0
- data/lib/source_monitor/fetching/feed_fetcher/source_updater.rb +82 -2
- data/lib/source_monitor/fetching/feed_fetcher.rb +55 -1
- data/lib/source_monitor/fetching/fetch_error.rb +27 -0
- data/lib/source_monitor/fetching/fetch_runner.rb +4 -0
- data/lib/source_monitor/fetching/retry_policy.rb +4 -0
- data/lib/source_monitor/health/import_source_health_check.rb +3 -3
- data/lib/source_monitor/health/source_health_monitor.rb +9 -14
- data/lib/source_monitor/health/source_health_reset.rb +1 -1
- data/lib/source_monitor/pagination/paginator.rb +18 -1
- data/lib/source_monitor/version.rb +1 -1
- data/lib/source_monitor.rb +3 -0
- metadata +17 -1
|
@@ -3,9 +3,13 @@ import { Controller } from "@hotwired/stimulus";
|
|
|
3
3
|
export default class extends Controller {
|
|
4
4
|
static targets = ["panel"];
|
|
5
5
|
static classes = ["open"];
|
|
6
|
+
static values = { autoOpen: Boolean, removeOnClose: Boolean };
|
|
6
7
|
|
|
7
8
|
connect() {
|
|
8
9
|
this.handleEscape = this.handleEscape.bind(this);
|
|
10
|
+
if (this.autoOpenValue) {
|
|
11
|
+
this.open();
|
|
12
|
+
}
|
|
9
13
|
}
|
|
10
14
|
|
|
11
15
|
disconnect() {
|
|
@@ -35,6 +39,10 @@ export default class extends Controller {
|
|
|
35
39
|
}
|
|
36
40
|
|
|
37
41
|
this.teardown();
|
|
42
|
+
|
|
43
|
+
if (this.removeOnCloseValue) {
|
|
44
|
+
this.element.remove();
|
|
45
|
+
}
|
|
38
46
|
}
|
|
39
47
|
|
|
40
48
|
backdrop(event) {
|
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
import { Controller } from "@hotwired/stimulus";
|
|
2
2
|
|
|
3
3
|
export default class extends Controller {
|
|
4
|
-
static targets = ["master", "item"];
|
|
4
|
+
static targets = ["master", "item", "actionBar", "count"];
|
|
5
5
|
|
|
6
6
|
connect() {
|
|
7
7
|
this.syncMaster();
|
|
8
|
+
this.updateActionBar();
|
|
8
9
|
}
|
|
9
10
|
|
|
10
11
|
itemTargetConnected() {
|
|
11
12
|
this.syncMaster();
|
|
13
|
+
this.updateActionBar();
|
|
12
14
|
}
|
|
13
15
|
|
|
14
16
|
itemTargetDisconnected() {
|
|
15
17
|
this.syncMaster();
|
|
18
|
+
this.updateActionBar();
|
|
16
19
|
}
|
|
17
20
|
|
|
18
21
|
toggleAll(event) {
|
|
@@ -21,16 +24,33 @@ export default class extends Controller {
|
|
|
21
24
|
if (checkbox.disabled) return;
|
|
22
25
|
checkbox.checked = checked;
|
|
23
26
|
});
|
|
27
|
+
this.updateActionBar();
|
|
24
28
|
}
|
|
25
29
|
|
|
26
30
|
toggleItem() {
|
|
27
31
|
this.syncMaster();
|
|
32
|
+
this.updateActionBar();
|
|
28
33
|
}
|
|
29
34
|
|
|
30
35
|
syncMaster() {
|
|
31
36
|
if (!this.hasMasterTarget) return;
|
|
32
37
|
const selectable = this.itemTargets.filter((checkbox) => !checkbox.disabled);
|
|
33
|
-
const allChecked =
|
|
38
|
+
const allChecked =
|
|
39
|
+
selectable.length > 0 &&
|
|
40
|
+
selectable.every((checkbox) => checkbox.checked);
|
|
34
41
|
this.masterTarget.checked = allChecked;
|
|
35
42
|
}
|
|
43
|
+
|
|
44
|
+
updateActionBar() {
|
|
45
|
+
if (!this.hasActionBarTarget) return;
|
|
46
|
+
const checkedCount = this.itemTargets.filter((cb) => cb.checked).length;
|
|
47
|
+
if (this.hasCountTarget) {
|
|
48
|
+
this.countTarget.textContent = checkedCount;
|
|
49
|
+
}
|
|
50
|
+
if (checkedCount > 0) {
|
|
51
|
+
this.actionBarTarget.classList.remove("hidden");
|
|
52
|
+
} else {
|
|
53
|
+
this.actionBarTarget.classList.add("hidden");
|
|
54
|
+
}
|
|
55
|
+
}
|
|
36
56
|
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
class BulkScrapeEnablementsController < ApplicationController
|
|
5
|
+
def create
|
|
6
|
+
source_ids = Array(params.dig(:bulk_scrape_enablement, :source_ids)).map(&:to_i).reject(&:zero?)
|
|
7
|
+
|
|
8
|
+
if source_ids.empty?
|
|
9
|
+
handle_empty_selection
|
|
10
|
+
return
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
sources = Source.where(id: source_ids, scraping_enabled: false)
|
|
14
|
+
updated_count = sources.update_all(
|
|
15
|
+
scraping_enabled: true,
|
|
16
|
+
auto_scrape: true,
|
|
17
|
+
scraper_adapter: default_adapter,
|
|
18
|
+
updated_at: Time.current
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
respond_to do |format|
|
|
22
|
+
format.turbo_stream do
|
|
23
|
+
responder = SourceMonitor::TurboStreams::StreamResponder.new
|
|
24
|
+
responder.toast(
|
|
25
|
+
message: "Scraping enabled for #{updated_count} #{'source'.pluralize(updated_count)}.",
|
|
26
|
+
level: :success
|
|
27
|
+
)
|
|
28
|
+
responder.redirect(source_monitor.sources_path)
|
|
29
|
+
render turbo_stream: responder.render(view_context)
|
|
30
|
+
end
|
|
31
|
+
format.html do
|
|
32
|
+
redirect_to source_monitor.sources_path,
|
|
33
|
+
notice: "Scraping enabled for #{updated_count} #{'source'.pluralize(updated_count)}."
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def default_adapter
|
|
41
|
+
"readability"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def handle_empty_selection
|
|
45
|
+
respond_to do |format|
|
|
46
|
+
format.turbo_stream do
|
|
47
|
+
responder = SourceMonitor::TurboStreams::StreamResponder.new
|
|
48
|
+
responder.toast(message: "No sources selected.", level: :warning)
|
|
49
|
+
render turbo_stream: responder.render(view_context), status: :unprocessable_entity
|
|
50
|
+
end
|
|
51
|
+
format.html do
|
|
52
|
+
redirect_to source_monitor.sources_path, alert: "No sources selected."
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -17,11 +17,20 @@ module SourceMonitor
|
|
|
17
17
|
).to_a
|
|
18
18
|
@job_adapter = SourceMonitor::Jobs::Visibility.adapter_name
|
|
19
19
|
@job_metrics = queries.job_metrics
|
|
20
|
-
|
|
20
|
+
@schedule_pages = schedule_pages_params
|
|
21
|
+
fetch_schedule = queries.upcoming_fetch_schedule(pages: @schedule_pages)
|
|
21
22
|
@fetch_schedule_groups = fetch_schedule.groups
|
|
22
23
|
@fetch_schedule_reference_time = fetch_schedule.reference_time
|
|
24
|
+
@scrape_candidates_count = @stats[:scrape_candidates_count]
|
|
25
|
+
@scrape_recommendation_threshold = SourceMonitor.config.scraping.scrape_recommendation_threshold
|
|
23
26
|
@mission_control_enabled = SourceMonitor.mission_control_enabled?
|
|
24
27
|
@mission_control_dashboard_path = SourceMonitor.mission_control_dashboard_path
|
|
25
28
|
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def schedule_pages_params
|
|
33
|
+
params.fetch(:schedule_pages, {}).permit!.to_h
|
|
34
|
+
end
|
|
26
35
|
end
|
|
27
36
|
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
class ImportHistoryDismissalsController < ApplicationController
|
|
5
|
+
def create
|
|
6
|
+
import_history = ImportHistory.find(params[:import_history_id])
|
|
7
|
+
import_history.update!(dismissed_at: Time.current)
|
|
8
|
+
|
|
9
|
+
respond_to do |format|
|
|
10
|
+
format.turbo_stream do
|
|
11
|
+
render turbo_stream: turbo_stream.remove("source_monitor_import_history_panel")
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
format.html do
|
|
15
|
+
redirect_to source_monitor.sources_path, notice: "Import dismissed"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -7,8 +7,16 @@ module SourceMonitor
|
|
|
7
7
|
before_action :set_source
|
|
8
8
|
|
|
9
9
|
def create
|
|
10
|
-
SourceMonitor::Fetching::FetchRunner.enqueue(@source.id, force: true)
|
|
11
|
-
|
|
10
|
+
result = SourceMonitor::Fetching::FetchRunner.enqueue(@source.id, force: true)
|
|
11
|
+
|
|
12
|
+
if result == :already_fetching
|
|
13
|
+
render_fetch_enqueue_response(
|
|
14
|
+
"Fetch already in progress for this source. Please wait for the current fetch to complete.",
|
|
15
|
+
toast_level: :warning
|
|
16
|
+
)
|
|
17
|
+
else
|
|
18
|
+
render_fetch_enqueue_response("Retry has been forced and will run shortly.")
|
|
19
|
+
end
|
|
12
20
|
rescue StandardError => error
|
|
13
21
|
handle_fetch_failure(error)
|
|
14
22
|
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SourceMonitor
|
|
4
|
+
class SourceScrapeTestsController < ApplicationController
|
|
5
|
+
before_action :set_source
|
|
6
|
+
|
|
7
|
+
def create
|
|
8
|
+
item = pick_test_item
|
|
9
|
+
unless item
|
|
10
|
+
handle_no_item
|
|
11
|
+
return
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
result = SourceMonitor::Scraping::ItemScraper.new(item: item, source: @source).call
|
|
15
|
+
|
|
16
|
+
@test_result = {
|
|
17
|
+
item: item.reload,
|
|
18
|
+
scrape_result: result,
|
|
19
|
+
feed_word_count: item.item_content&.feed_word_count,
|
|
20
|
+
scraped_word_count: item.item_content&.scraped_word_count,
|
|
21
|
+
feed_content_preview: item.content.to_s.truncate(500),
|
|
22
|
+
scraped_content_preview: item.item_content&.scraped_content.to_s.truncate(500),
|
|
23
|
+
improvement: compute_improvement(item)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
respond_to do |format|
|
|
27
|
+
format.turbo_stream do
|
|
28
|
+
render turbo_stream: [
|
|
29
|
+
turbo_stream.remove("scrape_test_modal_#{@source.id}"),
|
|
30
|
+
turbo_stream.append_all("body",
|
|
31
|
+
partial: "source_monitor/source_scrape_tests/result",
|
|
32
|
+
locals: { source: @source, test_result: @test_result })
|
|
33
|
+
]
|
|
34
|
+
end
|
|
35
|
+
format.html { render :show }
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def set_source
|
|
42
|
+
@source = Source.find(params[:source_id])
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def pick_test_item
|
|
46
|
+
@source.items
|
|
47
|
+
.joins(:item_content)
|
|
48
|
+
.where.not(sourcemon_item_contents: { feed_word_count: nil })
|
|
49
|
+
.order(published_at: :desc)
|
|
50
|
+
.first
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def handle_no_item
|
|
54
|
+
respond_to do |format|
|
|
55
|
+
format.turbo_stream do
|
|
56
|
+
responder = SourceMonitor::TurboStreams::StreamResponder.new
|
|
57
|
+
responder.toast(message: "No items with feed content available for test scrape.", level: :warning)
|
|
58
|
+
render turbo_stream: responder.render(view_context)
|
|
59
|
+
end
|
|
60
|
+
format.html do
|
|
61
|
+
redirect_to source_monitor.source_path(@source), alert: "No items available for test scrape."
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def compute_improvement(item)
|
|
67
|
+
feed = item.item_content&.feed_word_count.to_i
|
|
68
|
+
scraped = item.item_content&.scraped_word_count.to_i
|
|
69
|
+
return 0 if feed.zero?
|
|
70
|
+
((scraped - feed).to_f / feed * 100).round(1)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -20,29 +20,27 @@ module SourceMonitor
|
|
|
20
20
|
|
|
21
21
|
def index
|
|
22
22
|
@search_params = sanitized_search_params
|
|
23
|
-
|
|
23
|
+
expand_scrape_recommendation_filter
|
|
24
|
+
@q = build_search_query(params: @search_params)
|
|
24
25
|
|
|
25
|
-
paginator = SourceMonitor::Pagination::Paginator.new(
|
|
26
|
+
@paginator = SourceMonitor::Pagination::Paginator.new(
|
|
26
27
|
scope: @q.result,
|
|
27
28
|
page: params[:page],
|
|
28
29
|
per_page: params[:per_page] || PER_PAGE
|
|
29
30
|
).paginate
|
|
30
31
|
|
|
31
|
-
@sources = paginator.records
|
|
32
|
-
@page = paginator.page
|
|
33
|
-
@has_next_page = paginator.has_next_page
|
|
34
|
-
@has_previous_page = paginator.has_previous_page
|
|
32
|
+
@sources = @paginator.records
|
|
35
33
|
|
|
36
34
|
@search_term = @search_params[SEARCH_FIELD.to_s].to_s.strip
|
|
37
35
|
@search_field = SEARCH_FIELD
|
|
38
36
|
|
|
39
37
|
metrics = SourceMonitor::Analytics::SourcesIndexMetrics.new(
|
|
40
38
|
base_scope: Source.all,
|
|
41
|
-
result_scope: paginator.records,
|
|
39
|
+
result_scope: @paginator.records,
|
|
42
40
|
search_params: @search_params
|
|
43
41
|
)
|
|
44
42
|
|
|
45
|
-
@recent_import_histories = SourceMonitor::ImportHistory.recent_for(source_monitor_current_user&.id).limit(5)
|
|
43
|
+
@recent_import_histories = SourceMonitor::ImportHistory.not_dismissed.recent_for(source_monitor_current_user&.id).limit(5)
|
|
46
44
|
|
|
47
45
|
@fetch_interval_distribution = metrics.fetch_interval_distribution
|
|
48
46
|
@fetch_interval_filter = metrics.fetch_interval_filter
|
|
@@ -62,6 +60,8 @@ module SourceMonitor
|
|
|
62
60
|
@avg_feed_word_counts = {}
|
|
63
61
|
@avg_scraped_word_counts = {}
|
|
64
62
|
end
|
|
63
|
+
|
|
64
|
+
@scrape_candidate_ids = compute_scrape_candidate_ids
|
|
65
65
|
end
|
|
66
66
|
|
|
67
67
|
def show
|
|
@@ -90,8 +90,17 @@ module SourceMonitor
|
|
|
90
90
|
end
|
|
91
91
|
|
|
92
92
|
def update
|
|
93
|
+
scraping_was_disabled = !@source.scraping_enabled?
|
|
94
|
+
|
|
93
95
|
if @source.update(source_params)
|
|
94
|
-
|
|
96
|
+
notice = "Source updated successfully"
|
|
97
|
+
|
|
98
|
+
if scraping_was_disabled && @source.scraping_enabled?
|
|
99
|
+
enqueued = enqueue_unscraped_items(@source)
|
|
100
|
+
notice = "Auto-scraping enabled. #{enqueued} existing #{'item'.pluralize(enqueued)} queued for scraping."
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
redirect_to source_monitor.source_path(@source), notice: notice
|
|
95
104
|
else
|
|
96
105
|
render :edit, status: :unprocessable_entity
|
|
97
106
|
end
|
|
@@ -180,6 +189,39 @@ module SourceMonitor
|
|
|
180
189
|
end
|
|
181
190
|
end
|
|
182
191
|
|
|
192
|
+
def expand_scrape_recommendation_filter
|
|
193
|
+
return unless @search_params["scraping_enabled_eq"] == "recommend"
|
|
194
|
+
|
|
195
|
+
threshold = SourceMonitor.config.scraping.scrape_recommendation_threshold
|
|
196
|
+
@search_params.delete("scraping_enabled_eq")
|
|
197
|
+
@search_params["scraping_enabled_eq"] = "false"
|
|
198
|
+
@search_params["active_eq"] = "true"
|
|
199
|
+
@search_params["avg_feed_words_lt"] = threshold.to_s
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def compute_scrape_candidate_ids
|
|
203
|
+
threshold = SourceMonitor.config.scraping.scrape_recommendation_threshold
|
|
204
|
+
return Set.new if threshold.nil? || threshold <= 0
|
|
205
|
+
|
|
206
|
+
candidate_ids = @sources.select do |source|
|
|
207
|
+
avg = @avg_feed_word_counts[source.id]
|
|
208
|
+
avg.present? && avg < threshold && !source.scraping_enabled?
|
|
209
|
+
end.map(&:id)
|
|
210
|
+
|
|
211
|
+
Set.new(candidate_ids)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def enqueue_unscraped_items(source)
|
|
215
|
+
result = SourceMonitor::Scraping::BulkSourceScraper.new(
|
|
216
|
+
source: source,
|
|
217
|
+
selection: :unscraped
|
|
218
|
+
).call
|
|
219
|
+
result.enqueued_count
|
|
220
|
+
rescue StandardError => error
|
|
221
|
+
Rails.logger.warn("[SourceMonitor] Failed to enqueue unscraped items: #{error.message}") if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
222
|
+
0
|
|
223
|
+
end
|
|
224
|
+
|
|
183
225
|
def enqueue_favicon_fetch(source)
|
|
184
226
|
return unless defined?(ActiveStorage)
|
|
185
227
|
return unless SourceMonitor.config.favicons.enabled?
|
|
@@ -249,6 +249,30 @@ module SourceMonitor
|
|
|
249
249
|
end
|
|
250
250
|
end
|
|
251
251
|
|
|
252
|
+
def pagination_page_numbers(current_page:, total_pages:, window: 2)
|
|
253
|
+
return [ 1 ] if total_pages <= 1
|
|
254
|
+
|
|
255
|
+
# When total pages fit without gaps, show them all
|
|
256
|
+
if total_pages <= (2 * window) + 3
|
|
257
|
+
return (1..total_pages).to_a
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
pages = [ 1, total_pages ]
|
|
261
|
+
((current_page - window)..(current_page + window)).each do |p|
|
|
262
|
+
pages << p if p >= 1 && p <= total_pages
|
|
263
|
+
end
|
|
264
|
+
pages = pages.uniq.sort
|
|
265
|
+
|
|
266
|
+
result = []
|
|
267
|
+
last = 0
|
|
268
|
+
pages.each do |p|
|
|
269
|
+
result << :gap if p > last + 1
|
|
270
|
+
result << p
|
|
271
|
+
last = p
|
|
272
|
+
end
|
|
273
|
+
result
|
|
274
|
+
end
|
|
275
|
+
|
|
252
276
|
private
|
|
253
277
|
|
|
254
278
|
def external_link_icon
|
|
@@ -5,15 +5,13 @@ module SourceMonitor
|
|
|
5
5
|
def source_health_badge(source, override: nil)
|
|
6
6
|
return override if override.present?
|
|
7
7
|
|
|
8
|
-
status = source&.health_status.presence || "
|
|
8
|
+
status = source&.health_status.presence || "working"
|
|
9
9
|
|
|
10
10
|
mapping = {
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"critical" => { label: "Failing", classes: "bg-rose-100 text-rose-700", show_spinner: false },
|
|
14
|
-
"declining" => { label: "Declining", classes: "bg-orange-100 text-orange-700", show_spinner: false },
|
|
11
|
+
"working" => { label: "Working", classes: "bg-green-100 text-green-700", show_spinner: false },
|
|
12
|
+
"declining" => { label: "Declining", classes: "bg-yellow-100 text-yellow-700", show_spinner: false },
|
|
15
13
|
"improving" => { label: "Improving", classes: "bg-sky-100 text-sky-700", show_spinner: false },
|
|
16
|
-
"
|
|
14
|
+
"failing" => { label: "Failing", classes: "bg-rose-100 text-rose-700", show_spinner: false },
|
|
17
15
|
"unknown" => { label: "Unknown", classes: "bg-slate-100 text-slate-600", show_spinner: false }
|
|
18
16
|
}
|
|
19
17
|
|
|
@@ -21,11 +19,11 @@ module SourceMonitor
|
|
|
21
19
|
end
|
|
22
20
|
|
|
23
21
|
def source_health_actions(source)
|
|
24
|
-
status = source&.health_status.presence || "
|
|
22
|
+
status = source&.health_status.presence || "working"
|
|
25
23
|
helpers = SourceMonitor::Engine.routes.url_helpers
|
|
26
24
|
|
|
27
25
|
case status
|
|
28
|
-
when "
|
|
26
|
+
when "failing", "declining"
|
|
29
27
|
[
|
|
30
28
|
{
|
|
31
29
|
key: :full_fetch,
|
|
@@ -44,17 +42,6 @@ module SourceMonitor
|
|
|
44
42
|
data: { testid: "source-health-action-health_check" }
|
|
45
43
|
}
|
|
46
44
|
]
|
|
47
|
-
when "auto_paused"
|
|
48
|
-
[
|
|
49
|
-
{
|
|
50
|
-
key: :reset,
|
|
51
|
-
label: "Reset to Active Status",
|
|
52
|
-
description: "Clears the pause window, failure counters, and schedules the next fetch using the configured interval.",
|
|
53
|
-
path: helpers.source_health_reset_path(source),
|
|
54
|
-
method: :post,
|
|
55
|
-
data: { testid: "source-health-action-reset" }
|
|
56
|
-
}
|
|
57
|
-
]
|
|
58
45
|
else
|
|
59
46
|
[]
|
|
60
47
|
end
|
|
@@ -63,7 +50,7 @@ module SourceMonitor
|
|
|
63
50
|
def interactive_health_status?(source, override: nil)
|
|
64
51
|
return false if override.present?
|
|
65
52
|
|
|
66
|
-
%w[
|
|
53
|
+
%w[failing declining].include?(source&.health_status.presence)
|
|
67
54
|
end
|
|
68
55
|
end
|
|
69
56
|
end
|
|
@@ -7,15 +7,22 @@ module SourceMonitor
|
|
|
7
7
|
|
|
8
8
|
source_monitor_queue :fetch
|
|
9
9
|
|
|
10
|
+
SCHEDULED_CONCURRENCY_MAX_ATTEMPTS = 5
|
|
11
|
+
|
|
10
12
|
discard_on ActiveJob::DeserializationError
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
|
|
14
|
+
rescue_from SourceMonitor::Fetching::FetchRunner::ConcurrencyError do |error|
|
|
15
|
+
handle_concurrency_error(error)
|
|
16
|
+
end
|
|
14
17
|
|
|
15
18
|
def perform(source_id, force: false)
|
|
19
|
+
@source_id = source_id
|
|
20
|
+
@force = force
|
|
21
|
+
|
|
16
22
|
source = SourceMonitor::Source.find_by(id: source_id)
|
|
17
23
|
return unless source
|
|
18
24
|
|
|
25
|
+
@source = source
|
|
19
26
|
return unless should_run?(source, force: force)
|
|
20
27
|
|
|
21
28
|
SourceMonitor::Fetching::FetchRunner.new(source: source, force: force).run
|
|
@@ -25,6 +32,28 @@ module SourceMonitor
|
|
|
25
32
|
|
|
26
33
|
private
|
|
27
34
|
|
|
35
|
+
def handle_concurrency_error(error)
|
|
36
|
+
if @force
|
|
37
|
+
log_force_fetch_skipped
|
|
38
|
+
@source&.update_columns(fetch_status: "idle") if @source&.fetch_status == "queued"
|
|
39
|
+
else
|
|
40
|
+
attempt = executions
|
|
41
|
+
if attempt < SCHEDULED_CONCURRENCY_MAX_ATTEMPTS
|
|
42
|
+
retry_job wait: FETCH_CONCURRENCY_RETRY_WAIT
|
|
43
|
+
else
|
|
44
|
+
raise error
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def log_force_fetch_skipped
|
|
50
|
+
return unless defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
51
|
+
|
|
52
|
+
Rails.logger.info("[SourceMonitor::FetchFeedJob] Fetch already in progress for source #{@source_id}, skipping force-fetch")
|
|
53
|
+
rescue StandardError
|
|
54
|
+
nil
|
|
55
|
+
end
|
|
56
|
+
|
|
28
57
|
def should_run?(source, force:)
|
|
29
58
|
return true if force
|
|
30
59
|
|
|
@@ -12,11 +12,15 @@ module SourceMonitor
|
|
|
12
12
|
attribute :items_failed, :integer, default: 0
|
|
13
13
|
attribute :http_response_headers, default: -> { {} }
|
|
14
14
|
|
|
15
|
+
ERROR_CATEGORIES = %w[network parse blocked auth unknown].freeze
|
|
16
|
+
|
|
15
17
|
validates :source, presence: true
|
|
16
18
|
validates :items_created, :items_updated, :items_failed,
|
|
17
19
|
numericality: { greater_than_or_equal_to: 0 }
|
|
20
|
+
validates :error_category, inclusion: { in: ERROR_CATEGORIES }, allow_nil: true
|
|
18
21
|
|
|
19
22
|
scope :for_job, ->(job_id) { where(job_id:) }
|
|
23
|
+
scope :by_category, ->(category) { where(error_category: category) }
|
|
20
24
|
|
|
21
25
|
SourceMonitor::ModelExtensions.register(self, :fetch_log)
|
|
22
26
|
|
|
@@ -4,6 +4,8 @@ module SourceMonitor
|
|
|
4
4
|
class ImportHistory < ApplicationRecord
|
|
5
5
|
validates :user_id, presence: true
|
|
6
6
|
|
|
7
|
+
scope :not_dismissed, -> { where(dismissed_at: nil) }
|
|
8
|
+
|
|
7
9
|
scope :recent_for, lambda { |user_id|
|
|
8
10
|
scope = order(created_at: :desc)
|
|
9
11
|
scope = scope.where(user_id: user_id) if user_id
|
|
@@ -34,7 +34,7 @@ module SourceMonitor
|
|
|
34
34
|
attribute :custom_headers, default: -> { {} }
|
|
35
35
|
attribute :metadata, default: -> { {} }
|
|
36
36
|
attribute :fetch_status, :string, default: "idle"
|
|
37
|
-
attribute :health_status, :string, default: "
|
|
37
|
+
attribute :health_status, :string, default: "working"
|
|
38
38
|
|
|
39
39
|
sanitizes_string_attributes :name, :feed_url, :website_url, :scraper_adapter
|
|
40
40
|
sanitizes_hash_attributes :scrape_settings, :custom_headers, :metadata
|
|
@@ -61,9 +61,28 @@ module SourceMonitor
|
|
|
61
61
|
active.where(arel_table[:next_fetch_at].eq(nil).or(arel_table[:next_fetch_at].lteq(reference_time)))
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
+
def scrape_candidates(threshold: SourceMonitor.config.scraping.scrape_recommendation_threshold)
|
|
65
|
+
threshold_value = threshold.to_i
|
|
66
|
+
return none if threshold_value <= 0
|
|
67
|
+
|
|
68
|
+
active
|
|
69
|
+
.where(scraping_enabled: false)
|
|
70
|
+
.where(
|
|
71
|
+
"#{table_name}.id IN (
|
|
72
|
+
SELECT i.source_id
|
|
73
|
+
FROM #{Item.table_name} i
|
|
74
|
+
INNER JOIN #{ItemContent.table_name} ic ON ic.item_id = i.id
|
|
75
|
+
WHERE ic.feed_word_count IS NOT NULL
|
|
76
|
+
GROUP BY i.source_id
|
|
77
|
+
HAVING AVG(ic.feed_word_count) < ?
|
|
78
|
+
)", threshold_value
|
|
79
|
+
)
|
|
80
|
+
end
|
|
81
|
+
|
|
64
82
|
def ransackable_attributes(_auth_object = nil)
|
|
65
83
|
%w[name feed_url website_url created_at fetch_interval_minutes items_count last_fetched_at
|
|
66
|
-
active health_status feed_format scraper_adapter
|
|
84
|
+
active health_status feed_format scraper_adapter scraping_enabled
|
|
85
|
+
new_items_per_day avg_feed_words avg_scraped_words]
|
|
67
86
|
end
|
|
68
87
|
|
|
69
88
|
def ransackable_associations(_auth_object = nil)
|
|
@@ -71,6 +90,32 @@ module SourceMonitor
|
|
|
71
90
|
end
|
|
72
91
|
end
|
|
73
92
|
|
|
93
|
+
ransacker :new_items_per_day do
|
|
94
|
+
Arel.sql(
|
|
95
|
+
"(SELECT COUNT(*) / 30.0 FROM #{Item.table_name} i" \
|
|
96
|
+
" WHERE i.source_id = #{table_name}.id" \
|
|
97
|
+
" AND i.created_at >= NOW() - INTERVAL '30 days')"
|
|
98
|
+
)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
ransacker :avg_feed_words do
|
|
102
|
+
Arel.sql(
|
|
103
|
+
"(SELECT AVG(ic.feed_word_count) FROM #{ItemContent.table_name} ic" \
|
|
104
|
+
" INNER JOIN #{Item.table_name} i ON i.id = ic.item_id" \
|
|
105
|
+
" WHERE i.source_id = #{table_name}.id" \
|
|
106
|
+
" AND ic.feed_word_count IS NOT NULL)"
|
|
107
|
+
)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
ransacker :avg_scraped_words do
|
|
111
|
+
Arel.sql(
|
|
112
|
+
"(SELECT AVG(ic.scraped_word_count) FROM #{ItemContent.table_name} ic" \
|
|
113
|
+
" INNER JOIN #{Item.table_name} i ON i.id = ic.item_id" \
|
|
114
|
+
" WHERE i.source_id = #{table_name}.id" \
|
|
115
|
+
" AND ic.scraped_word_count IS NOT NULL)"
|
|
116
|
+
)
|
|
117
|
+
end
|
|
118
|
+
|
|
74
119
|
def fetch_interval_minutes=(value)
|
|
75
120
|
self[:fetch_interval_minutes] = value.presence && value.to_i
|
|
76
121
|
end
|