prospector_engine 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +333 -0
- data/Rakefile +9 -0
- data/app/CLAUDE.md +43 -0
- data/app/assets/stylesheets/prospector/application.css +476 -0
- data/app/controllers/prospector/application_controller.rb +16 -0
- data/app/controllers/prospector/candidates_controller.rb +31 -0
- data/app/controllers/prospector/keyword_generations_controller.rb +10 -0
- data/app/controllers/prospector/keywords_controller.rb +38 -0
- data/app/controllers/prospector/run_bulk_approvals_controller.rb +13 -0
- data/app/controllers/prospector/run_cancellations_controller.rb +9 -0
- data/app/controllers/prospector/run_reclassifications_controller.rb +21 -0
- data/app/controllers/prospector/run_restarts_controller.rb +14 -0
- data/app/controllers/prospector/run_retries_controller.rb +14 -0
- data/app/controllers/prospector/runs_controller.rb +47 -0
- data/app/jobs/prospector/application_job.rb +5 -0
- data/app/jobs/prospector/bulk_approve_job.rb +14 -0
- data/app/jobs/prospector/classify_job.rb +17 -0
- data/app/jobs/prospector/fetch_job.rb +8 -0
- data/app/models/prospector/application_record.rb +6 -0
- data/app/models/prospector/candidate.rb +93 -0
- data/app/models/prospector/classification_run.rb +15 -0
- data/app/models/prospector/keyword.rb +16 -0
- data/app/models/prospector/run.rb +94 -0
- data/app/views/prospector/candidates/show.html.erb +63 -0
- data/app/views/prospector/keywords/index.html.erb +72 -0
- data/app/views/prospector/layouts/prospector.html.erb +38 -0
- data/app/views/prospector/runs/index.html.erb +33 -0
- data/app/views/prospector/runs/new.html.erb +109 -0
- data/app/views/prospector/runs/show.html.erb +111 -0
- data/config/routes.rb +15 -0
- data/db/prospector_schema.rb +81 -0
- data/lib/generators/prospector/install/install_generator.rb +31 -0
- data/lib/generators/prospector/install/templates/create_prospector_tables.rb +83 -0
- data/lib/generators/prospector/install/templates/prospector.rb +37 -0
- data/lib/prospector/CLAUDE.md +52 -0
- data/lib/prospector/classification/runner.rb +105 -0
- data/lib/prospector/configuration.rb +56 -0
- data/lib/prospector/engine.rb +18 -0
- data/lib/prospector/enrichment/contact_scraper.rb +188 -0
- data/lib/prospector/error.rb +8 -0
- data/lib/prospector/geography/base.rb +40 -0
- data/lib/prospector/geography/bounding_box.rb +58 -0
- data/lib/prospector/geography/city.rb +29 -0
- data/lib/prospector/geography/coordinates.rb +43 -0
- data/lib/prospector/geography/metro_area.rb +74 -0
- data/lib/prospector/geography/zip_code.rb +25 -0
- data/lib/prospector/keywords/generator.rb +74 -0
- data/lib/prospector/pipeline/normalizer.rb +57 -0
- data/lib/prospector/pipeline/orchestrator.rb +151 -0
- data/lib/prospector/sources/base.rb +13 -0
- data/lib/prospector/sources/google_places/adapter.rb +92 -0
- data/lib/prospector/sources/google_places/client.rb +58 -0
- data/lib/prospector/sources/google_places/us_address_validator.rb +24 -0
- data/lib/prospector/sources/result.rb +21 -0
- data/lib/prospector/version.rb +3 -0
- data/lib/prospector.rb +20 -0
- metadata +185 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
<% if defined?(Turbo::StreamsFrom) %>
|
|
2
|
+
<%= turbo_stream_from @run %>
|
|
3
|
+
<% end %>
|
|
4
|
+
|
|
5
|
+
<div class="prospector-flex prospector-flex-between">
|
|
6
|
+
<h1 class="prospector-heading"><%= @run.label || "Run ##{@run.id}" %></h1>
|
|
7
|
+
<div class="prospector-flex prospector-gap-2">
|
|
8
|
+
<% if @run.cancellable? %>
|
|
9
|
+
<%= button_to "Cancel", run_cancellation_path(@run), method: :post, class: "prospector-btn prospector-btn-danger prospector-btn-sm" %>
|
|
10
|
+
<% end %>
|
|
11
|
+
<% if @run.retryable? %>
|
|
12
|
+
<%= button_to "Retry", run_retry_path(@run), method: :post, class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
|
|
13
|
+
<% end %>
|
|
14
|
+
<% if @run.restartable? %>
|
|
15
|
+
<%= button_to "Restart", run_restart_path(@run), method: :post, class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
|
|
16
|
+
<% end %>
|
|
17
|
+
</div>
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
<% if @run.error_messages.present? %>
|
|
21
|
+
<div class="prospector-card" style="border-left: 3px solid var(--p-danger); background: var(--p-danger-bg);">
|
|
22
|
+
<div class="prospector-subheading" style="color: var(--p-danger);">Error</div>
|
|
23
|
+
<pre style="font-family: var(--p-mono, monospace); font-size: 0.75rem; color: var(--p-text-secondary, #a1a1a8); white-space: pre-wrap; margin: 0;"><%= @run.error_messages %></pre>
|
|
24
|
+
</div>
|
|
25
|
+
<% end %>
|
|
26
|
+
|
|
27
|
+
<div class="prospector-stats">
|
|
28
|
+
<div class="prospector-stat">
|
|
29
|
+
<div class="prospector-stat-value"><%= @run.total_found %></div>
|
|
30
|
+
<div class="prospector-stat-label">Found</div>
|
|
31
|
+
</div>
|
|
32
|
+
<div class="prospector-stat">
|
|
33
|
+
<div class="prospector-stat-value"><%= @run.fetched_count %></div>
|
|
34
|
+
<div class="prospector-stat-label">Fetched</div>
|
|
35
|
+
</div>
|
|
36
|
+
<div class="prospector-stat">
|
|
37
|
+
<div class="prospector-stat-value"><%= @run.skipped_count %></div>
|
|
38
|
+
<div class="prospector-stat-label">Skipped</div>
|
|
39
|
+
</div>
|
|
40
|
+
<div class="prospector-stat">
|
|
41
|
+
<div class="prospector-stat-value"><span class="prospector-badge prospector-badge-<%= @run.status %>"><%= @run.status.titleize %></span></div>
|
|
42
|
+
<div class="prospector-stat-label">Status</div>
|
|
43
|
+
</div>
|
|
44
|
+
</div>
|
|
45
|
+
|
|
46
|
+
<% if @run.completed? || @run.classifying? %>
|
|
47
|
+
<div class="prospector-flex prospector-gap-2 prospector-mt-4" style="margin-bottom: 1rem;">
|
|
48
|
+
<%= button_to "Bulk Approve All", run_bulk_approval_path(@run), method: :post, class: "prospector-btn prospector-btn-primary prospector-btn-sm" %>
|
|
49
|
+
<%= button_to "Reclassify", run_reclassification_path(@run), method: :post, class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
|
|
50
|
+
</div>
|
|
51
|
+
<% end %>
|
|
52
|
+
|
|
53
|
+
<div class="prospector-tabs">
|
|
54
|
+
<% %w[pending approved rejected].each do |filter| %>
|
|
55
|
+
<% count = @status_counts[filter] || 0 %>
|
|
56
|
+
<%= link_to run_path(@run, filter: filter),
|
|
57
|
+
class: "prospector-tab #{@filter == filter ? 'prospector-tab-active' : ''}" do %>
|
|
58
|
+
<%= filter.titleize %> (<%= count %>)
|
|
59
|
+
<% end %>
|
|
60
|
+
<% end %>
|
|
61
|
+
</div>
|
|
62
|
+
|
|
63
|
+
<div class="prospector-card">
|
|
64
|
+
<table class="prospector-table">
|
|
65
|
+
<thead>
|
|
66
|
+
<tr>
|
|
67
|
+
<th>Name</th>
|
|
68
|
+
<th>Address</th>
|
|
69
|
+
<th>Contact</th>
|
|
70
|
+
<th>Status</th>
|
|
71
|
+
<th>Actions</th>
|
|
72
|
+
</tr>
|
|
73
|
+
</thead>
|
|
74
|
+
<tbody>
|
|
75
|
+
<% @candidates.each do |candidate| %>
|
|
76
|
+
<tr>
|
|
77
|
+
<td>
|
|
78
|
+
<strong><%= candidate.name %></strong>
|
|
79
|
+
<% if candidate.website.present? %>
|
|
80
|
+
<br><a href="<%= candidate.website %>" target="_blank" class="prospector-text-muted prospector-text-sm"><%= truncate(candidate.website, length: 40) %></a>
|
|
81
|
+
<% end %>
|
|
82
|
+
</td>
|
|
83
|
+
<td class="prospector-text-sm"><%= candidate.address %></td>
|
|
84
|
+
<td class="prospector-text-sm">
|
|
85
|
+
<% if candidate.email.present? %>
|
|
86
|
+
<%= mail_to candidate.email, candidate.email, class: "prospector-text-sm" %><br>
|
|
87
|
+
<% end %>
|
|
88
|
+
<% if candidate.has_contact_info? %>
|
|
89
|
+
<span class="prospector-text-muted"><%= Prospector::Candidate::SOCIAL_FIELDS.count { |f| candidate[f].present? } %> social</span>
|
|
90
|
+
<% end %>
|
|
91
|
+
</td>
|
|
92
|
+
<td><span class="prospector-badge prospector-badge-<%= candidate.status %>"><%= candidate.status.titleize %></span></td>
|
|
93
|
+
<td>
|
|
94
|
+
<div class="prospector-flex prospector-gap-2">
|
|
95
|
+
<% if candidate.pending? && candidate.approvable? %>
|
|
96
|
+
<%= button_to "Approve", run_candidate_path(@run, candidate, status: "approved", return_filter: @filter), method: :patch, class: "prospector-btn prospector-btn-primary prospector-btn-sm" %>
|
|
97
|
+
<% end %>
|
|
98
|
+
<% if candidate.pending? %>
|
|
99
|
+
<%= button_to "Reject", run_candidate_path(@run, candidate, status: "rejected", return_filter: @filter), method: :patch, class: "prospector-btn prospector-btn-danger prospector-btn-sm" %>
|
|
100
|
+
<% end %>
|
|
101
|
+
<% if candidate.rejected? %>
|
|
102
|
+
<%= button_to "Restore", run_candidate_path(@run, candidate, status: "pending", return_filter: @filter), method: :patch, class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
|
|
103
|
+
<% end %>
|
|
104
|
+
<%= link_to "Details", run_candidate_path(@run, candidate), class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
|
|
105
|
+
</div>
|
|
106
|
+
</td>
|
|
107
|
+
</tr>
|
|
108
|
+
<% end %>
|
|
109
|
+
</tbody>
|
|
110
|
+
</table>
|
|
111
|
+
</div>
|
data/config/routes.rb
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Prospector::Engine.routes.draw do
|
|
2
|
+
resources :runs, only: [:index, :show, :new, :create] do
|
|
3
|
+
resource :retry, only: [:create], controller: "run_retries"
|
|
4
|
+
resource :restart, only: [:create], controller: "run_restarts"
|
|
5
|
+
resource :cancellation, only: [:create], controller: "run_cancellations"
|
|
6
|
+
resource :reclassification, only: [:create], controller: "run_reclassifications"
|
|
7
|
+
resource :bulk_approval, only: [:create], controller: "run_bulk_approvals"
|
|
8
|
+
resources :candidates, only: [:show, :update]
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
resources :keywords, only: [:index, :create, :update, :destroy]
|
|
12
|
+
resources :keyword_generations, only: [:create]
|
|
13
|
+
|
|
14
|
+
root to: "runs#index"
|
|
15
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
ActiveRecord::Schema.define do
|
|
2
|
+
create_table :prospector_runs, if_not_exists: true do |t|
|
|
3
|
+
t.string :status, null: false, default: "pending"
|
|
4
|
+
t.string :source_adapter, null: false
|
|
5
|
+
t.string :geography_type, null: false
|
|
6
|
+
t.jsonb :geography_data, null: false, default: {}
|
|
7
|
+
t.string :categories, array: true, default: []
|
|
8
|
+
t.string :label
|
|
9
|
+
t.integer :actor_id
|
|
10
|
+
t.integer :total_found, default: 0
|
|
11
|
+
t.integer :fetched_count, default: 0
|
|
12
|
+
t.integer :skipped_count, default: 0
|
|
13
|
+
t.integer :error_count, default: 0
|
|
14
|
+
t.text :error_messages
|
|
15
|
+
t.jsonb :metadata, default: {}
|
|
16
|
+
t.datetime :started_at
|
|
17
|
+
t.datetime :completed_at
|
|
18
|
+
t.timestamps null: false
|
|
19
|
+
|
|
20
|
+
t.index :status
|
|
21
|
+
t.index :source_adapter
|
|
22
|
+
t.index :created_at
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
create_table :prospector_candidates, if_not_exists: true do |t|
|
|
26
|
+
t.references :run, null: false, foreign_key: { to_table: :prospector_runs }
|
|
27
|
+
t.string :status, null: false, default: "pending"
|
|
28
|
+
t.string :name, null: false
|
|
29
|
+
t.string :address, null: false
|
|
30
|
+
t.decimal :latitude, precision: 10, scale: 6
|
|
31
|
+
t.decimal :longitude, precision: 10, scale: 6
|
|
32
|
+
t.string :phone_number
|
|
33
|
+
t.string :website
|
|
34
|
+
t.string :email
|
|
35
|
+
t.string :facebook_url
|
|
36
|
+
t.string :instagram_url
|
|
37
|
+
t.string :linkedin_url
|
|
38
|
+
t.string :tiktok_url
|
|
39
|
+
t.string :youtube_url
|
|
40
|
+
t.text :description
|
|
41
|
+
t.string :category
|
|
42
|
+
t.jsonb :hours_of_operation, default: {}
|
|
43
|
+
t.string :source_uid, null: false
|
|
44
|
+
t.jsonb :source_data, default: {}
|
|
45
|
+
t.jsonb :metadata, default: {}
|
|
46
|
+
t.timestamps null: false
|
|
47
|
+
|
|
48
|
+
t.index [:run_id, :source_uid], unique: true
|
|
49
|
+
t.index [:run_id, :status]
|
|
50
|
+
t.index :status
|
|
51
|
+
t.index :source_uid
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
create_table :prospector_classification_runs, if_not_exists: true do |t|
|
|
55
|
+
t.references :run, null: false, foreign_key: { to_table: :prospector_runs }
|
|
56
|
+
t.integer :actor_id
|
|
57
|
+
t.string :ai_model, null: false
|
|
58
|
+
t.string :status, null: false, default: "pending"
|
|
59
|
+
t.boolean :include_approved, null: false, default: false
|
|
60
|
+
t.jsonb :results
|
|
61
|
+
t.text :error_message
|
|
62
|
+
t.datetime :started_at
|
|
63
|
+
t.datetime :completed_at
|
|
64
|
+
t.timestamps null: false
|
|
65
|
+
|
|
66
|
+
t.index [:run_id, :status]
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
create_table :prospector_keywords, if_not_exists: true do |t|
|
|
70
|
+
t.string :domain, null: false
|
|
71
|
+
t.string :category, null: false
|
|
72
|
+
t.string :keyword, null: false
|
|
73
|
+
t.string :source, null: false, default: "llm"
|
|
74
|
+
t.boolean :active, null: false, default: true
|
|
75
|
+
t.jsonb :metadata, default: {}
|
|
76
|
+
t.timestamps null: false
|
|
77
|
+
|
|
78
|
+
t.index [:domain, :category, :keyword], unique: true
|
|
79
|
+
t.index [:domain, :active]
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module Prospector
|
|
2
|
+
module Generators
|
|
3
|
+
class InstallGenerator < Rails::Generators::Base
|
|
4
|
+
source_root File.expand_path("templates", __dir__)
|
|
5
|
+
|
|
6
|
+
def create_initializer
|
|
7
|
+
template "prospector.rb", "config/initializers/prospector.rb"
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def create_migration
|
|
11
|
+
template "create_prospector_tables.rb",
|
|
12
|
+
"db/migrate/#{Time.now.strftime('%Y%m%d%H%M%S')}_create_prospector_tables.rb"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def mount_engine
|
|
16
|
+
route 'mount Prospector::Engine => "/prospector"'
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def display_post_install
|
|
20
|
+
say ""
|
|
21
|
+
say "Prospector installed successfully!", :green
|
|
22
|
+
say ""
|
|
23
|
+
say "Next steps:"
|
|
24
|
+
say " 1. Edit config/initializers/prospector.rb"
|
|
25
|
+
say " 2. Run: bin/rails db:migrate"
|
|
26
|
+
say " 3. Visit /prospector in your browser"
|
|
27
|
+
say ""
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
class CreateProspectorTables < ActiveRecord::Migration[7.1]
|
|
2
|
+
def change
|
|
3
|
+
create_table :prospector_runs do |t|
|
|
4
|
+
t.string :status, null: false, default: "pending"
|
|
5
|
+
t.string :source_adapter, null: false
|
|
6
|
+
t.string :geography_type, null: false
|
|
7
|
+
t.jsonb :geography_data, null: false, default: {}
|
|
8
|
+
t.string :categories, array: true, default: []
|
|
9
|
+
t.string :label
|
|
10
|
+
t.integer :actor_id
|
|
11
|
+
t.integer :total_found, default: 0
|
|
12
|
+
t.integer :fetched_count, default: 0
|
|
13
|
+
t.integer :skipped_count, default: 0
|
|
14
|
+
t.integer :error_count, default: 0
|
|
15
|
+
t.text :error_messages
|
|
16
|
+
t.jsonb :metadata, default: {}
|
|
17
|
+
t.datetime :started_at
|
|
18
|
+
t.datetime :completed_at
|
|
19
|
+
t.timestamps null: false
|
|
20
|
+
|
|
21
|
+
t.index :status
|
|
22
|
+
t.index :source_adapter
|
|
23
|
+
t.index :created_at
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
create_table :prospector_candidates do |t|
|
|
27
|
+
t.references :run, null: false, foreign_key: { to_table: :prospector_runs }
|
|
28
|
+
t.string :status, null: false, default: "pending"
|
|
29
|
+
t.string :name, null: false
|
|
30
|
+
t.string :address, null: false
|
|
31
|
+
t.decimal :latitude, precision: 10, scale: 6
|
|
32
|
+
t.decimal :longitude, precision: 10, scale: 6
|
|
33
|
+
t.string :phone_number
|
|
34
|
+
t.string :website
|
|
35
|
+
t.string :email
|
|
36
|
+
t.string :facebook_url
|
|
37
|
+
t.string :instagram_url
|
|
38
|
+
t.string :linkedin_url
|
|
39
|
+
t.string :tiktok_url
|
|
40
|
+
t.string :youtube_url
|
|
41
|
+
t.text :description
|
|
42
|
+
t.string :category
|
|
43
|
+
t.jsonb :hours_of_operation, default: {}
|
|
44
|
+
t.string :source_uid, null: false
|
|
45
|
+
t.jsonb :source_data, default: {}
|
|
46
|
+
t.jsonb :metadata, default: {}
|
|
47
|
+
t.timestamps null: false
|
|
48
|
+
|
|
49
|
+
t.index [:run_id, :source_uid], unique: true
|
|
50
|
+
t.index [:run_id, :status]
|
|
51
|
+
t.index :status
|
|
52
|
+
t.index :source_uid
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
create_table :prospector_classification_runs do |t|
|
|
56
|
+
t.references :run, null: false, foreign_key: { to_table: :prospector_runs }
|
|
57
|
+
t.integer :actor_id
|
|
58
|
+
t.string :ai_model, null: false
|
|
59
|
+
t.string :status, null: false, default: "pending"
|
|
60
|
+
t.boolean :include_approved, null: false, default: false
|
|
61
|
+
t.jsonb :results
|
|
62
|
+
t.text :error_message
|
|
63
|
+
t.datetime :started_at
|
|
64
|
+
t.datetime :completed_at
|
|
65
|
+
t.timestamps null: false
|
|
66
|
+
|
|
67
|
+
t.index [:run_id, :status]
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
create_table :prospector_keywords do |t|
|
|
71
|
+
t.string :domain, null: false
|
|
72
|
+
t.string :category, null: false
|
|
73
|
+
t.string :keyword, null: false
|
|
74
|
+
t.string :source, null: false, default: "llm"
|
|
75
|
+
t.boolean :active, null: false, default: true
|
|
76
|
+
t.jsonb :metadata, default: {}
|
|
77
|
+
t.timestamps null: false
|
|
78
|
+
|
|
79
|
+
t.index [:domain, :category, :keyword], unique: true
|
|
80
|
+
t.index [:domain, :active]
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Prospector.configure do |config|
|
|
2
|
+
# Required: domain slug used for keyword generation and classification
|
|
3
|
+
config.domain = "your_domain"
|
|
4
|
+
|
|
5
|
+
# Required: block called when a candidate is approved
|
|
6
|
+
# The candidate is a Prospector::Candidate instance
|
|
7
|
+
config.on_approve do |candidate|
|
|
8
|
+
# Example:
|
|
9
|
+
# YourModel.create_from_prospector!(candidate)
|
|
10
|
+
raise "Configure Prospector.config.on_approve in config/initializers/prospector.rb"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Required: admin authentication
|
|
14
|
+
config.authenticate_admin_with do |controller|
|
|
15
|
+
# Example:
|
|
16
|
+
# controller.current_user&.admin?
|
|
17
|
+
raise "Configure Prospector.config.authenticate_admin_with in config/initializers/prospector.rb"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Required: classifier class (must inherit LlmClassifier::Classifier)
|
|
21
|
+
# Generate one with: rails g llm_classifier:classifier YourDomain category1 category2
|
|
22
|
+
# config.classifier = YourDomainClassifier
|
|
23
|
+
|
|
24
|
+
# Optional: default source adapter (default: :google_places)
|
|
25
|
+
# config.default_source = :google_places
|
|
26
|
+
|
|
27
|
+
# Optional: default AI model for classification (passed to classifier)
|
|
28
|
+
# config.default_classifier_model = "anthropic:claude-sonnet-4-20250514"
|
|
29
|
+
|
|
30
|
+
# Optional: duplicate check hook
|
|
31
|
+
# config.duplicate_check do |source_uid:, name:, **|
|
|
32
|
+
# YourModel.exists?(["import_metadata->>'place_id' = ?", source_uid])
|
|
33
|
+
# end
|
|
34
|
+
|
|
35
|
+
# Optional: job queue name (default: :default)
|
|
36
|
+
# config.queue_name = :prospector
|
|
37
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Library Code
|
|
2
|
+
|
|
3
|
+
## Sources (`sources/`)
|
|
4
|
+
|
|
5
|
+
Pluggable adapter system for fetching business data from external APIs.
|
|
6
|
+
|
|
7
|
+
### Interface
|
|
8
|
+
|
|
9
|
+
All adapters extend `Sources::Base` and implement `fetch(geography:, keywords:)` returning `Array<Sources::Result>`.
|
|
10
|
+
|
|
11
|
+
### Google Places Adapter (`sources/google_places/`)
|
|
12
|
+
|
|
13
|
+
- `Adapter` - Main adapter class. Deduplicates by UID across keyword iterations. Routes to text search or nearby search based on geography type.
|
|
14
|
+
- `Client` - HTTParty wrapper for Google Places API (New). Endpoints: searchText, searchNearby.
|
|
15
|
+
- `UsAddressValidator` - Filters non-US addresses via state code regex.
|
|
16
|
+
|
|
17
|
+
### Adding a New Source
|
|
18
|
+
|
|
19
|
+
1. Create `sources/your_source/adapter.rb` extending `Sources::Base`
|
|
20
|
+
2. Implement `fetch(geography:, keywords:)` returning `Array<Sources::Result>`
|
|
21
|
+
3. Register in host app: `config.register_source(:your_source, YourSource::Adapter)`
|
|
22
|
+
|
|
23
|
+
## Geography (`geography/`)
|
|
24
|
+
|
|
25
|
+
Value objects representing search areas. All extend `Geography::Base`.
|
|
26
|
+
|
|
27
|
+
- `MetroArea` - name + primary_state. Text search only.
|
|
28
|
+
- `City` - city + state. Text search only.
|
|
29
|
+
- `Coordinates` - lat/lng + radius_meters. Nearby search only.
|
|
30
|
+
- `ZipCode` - zip code string. Text search only.
|
|
31
|
+
- `BoundingBox` - NE/SW corners. Converted to center+radius for nearby search.
|
|
32
|
+
|
|
33
|
+
Key methods: `to_query_string` (for text search), `to_coordinate_restriction` (for nearby search), `to_h`/`from_h` (serialization).
|
|
34
|
+
|
|
35
|
+
## Pipeline (`pipeline/`)
|
|
36
|
+
|
|
37
|
+
- `Orchestrator` - Coordinates the full fetch flow. Called by `FetchJob`. Collects keywords, calls adapter, normalizes results, deduplicates, creates candidates.
|
|
38
|
+
- `Normalizer` - Parses formatted address strings into structured components (street, city, state, zip).
|
|
39
|
+
|
|
40
|
+
## Classification (`classification/`)
|
|
41
|
+
|
|
42
|
+
- `Runner` - Iterates unprocessed candidates, calls LLM via ruby_llm, stores results in candidate metadata. Auto-rejects candidates with empty categories.
|
|
43
|
+
|
|
44
|
+
## Keywords (`keywords/`)
|
|
45
|
+
|
|
46
|
+
- `Generator` - Checks DB for existing keywords first. On cache miss, calls LLM to generate keywords and persists them.
|
|
47
|
+
|
|
48
|
+
## Other Files
|
|
49
|
+
|
|
50
|
+
- `configuration.rb` - `Prospector.configure` DSL. Required keys: domain, on_approve, authenticate_admin_with.
|
|
51
|
+
- `engine.rb` - Rails::Engine with isolate_namespace.
|
|
52
|
+
- `error.rb` - Error hierarchy: Error, ConfigurationError, AdapterError, MissingApiKeyError, ClassificationError.
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
module Prospector
|
|
2
|
+
module Classification
|
|
3
|
+
class Runner
|
|
4
|
+
def initialize(run, model: nil, include_approved: false, classification_run: nil)
|
|
5
|
+
@run = run
|
|
6
|
+
@model = model || Prospector.config.default_classifier_model
|
|
7
|
+
@include_approved = include_approved
|
|
8
|
+
@classification_run = classification_run
|
|
9
|
+
@processed = 0
|
|
10
|
+
@rejected = 0
|
|
11
|
+
@errors = 0
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def perform
|
|
15
|
+
classify_candidates
|
|
16
|
+
finalize
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def classify_candidates
|
|
22
|
+
candidates_to_classify.find_each do |candidate|
|
|
23
|
+
break if @run.reload.cancelled?
|
|
24
|
+
|
|
25
|
+
classify_candidate(candidate)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def candidates_to_classify
|
|
30
|
+
scope = @run.candidates
|
|
31
|
+
scope = @include_approved ? scope.where(status: %w[pending approved]) : scope.where(status: "pending")
|
|
32
|
+
scope.ai_unprocessed
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def classify_candidate(candidate)
|
|
36
|
+
result = invoke_classifier(candidate)
|
|
37
|
+
store_result(candidate, result)
|
|
38
|
+
auto_reject_if_empty(candidate, result)
|
|
39
|
+
@processed += 1
|
|
40
|
+
rescue => e
|
|
41
|
+
candidate.update!(metadata: candidate.metadata.merge("llm_error" => e.message))
|
|
42
|
+
@errors += 1
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def invoke_classifier(candidate)
|
|
46
|
+
classifier = resolve_classifier
|
|
47
|
+
input = build_input(candidate)
|
|
48
|
+
classifier.classify(input, model: @model)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def resolve_classifier
|
|
52
|
+
raw = Prospector.config.classifier
|
|
53
|
+
raise ConfigurationError, "Prospector.config.classifier must be set" unless raw
|
|
54
|
+
|
|
55
|
+
raw.is_a?(String) ? raw.constantize : raw
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def build_input(candidate)
|
|
59
|
+
{
|
|
60
|
+
name: candidate.name,
|
|
61
|
+
address: candidate.address,
|
|
62
|
+
website: candidate.website,
|
|
63
|
+
description: candidate.description,
|
|
64
|
+
source_types: candidate.source_data.dig("types")&.join(", ")
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def store_result(candidate, result)
|
|
69
|
+
candidate.update!(metadata: candidate.metadata.merge(
|
|
70
|
+
"llm_categories" => result.categories,
|
|
71
|
+
"llm_confidence" => result.confidence,
|
|
72
|
+
"llm_reasoning" => result.reasoning,
|
|
73
|
+
"llm_classified_at" => Time.current.iso8601,
|
|
74
|
+
"llm_model" => result.model,
|
|
75
|
+
"classification_run_id" => @classification_run&.id
|
|
76
|
+
))
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def auto_reject_if_empty(candidate, result)
|
|
80
|
+
return if result.categories.any?
|
|
81
|
+
|
|
82
|
+
candidate.reject!(reason: "no_relevant_categories")
|
|
83
|
+
@rejected += 1
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def finalize
|
|
87
|
+
return finalize_classification_run("cancelled") if @run.reload.cancelled?
|
|
88
|
+
|
|
89
|
+
run_status = @processed.zero? && @errors.positive? ? "failed" : "completed"
|
|
90
|
+
@run.update!(status: run_status, completed_at: Time.current)
|
|
91
|
+
finalize_classification_run(run_status)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def finalize_classification_run(run_status)
|
|
95
|
+
return unless @classification_run
|
|
96
|
+
|
|
97
|
+
@classification_run.update!(
|
|
98
|
+
status: run_status,
|
|
99
|
+
completed_at: Time.current,
|
|
100
|
+
results: { "processed" => @processed, "rejected" => @rejected, "errors" => @errors }
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
module Prospector
|
|
2
|
+
class Configuration
|
|
3
|
+
attr_accessor :domain,
|
|
4
|
+
:default_source,
|
|
5
|
+
:default_classifier_model,
|
|
6
|
+
:classifier,
|
|
7
|
+
:turbo_enabled,
|
|
8
|
+
:queue_name
|
|
9
|
+
|
|
10
|
+
attr_reader :sources
|
|
11
|
+
|
|
12
|
+
def initialize
|
|
13
|
+
@domain = nil
|
|
14
|
+
@default_source = :google_places
|
|
15
|
+
@default_classifier_model = "anthropic:claude-sonnet-4-20250514"
|
|
16
|
+
@classifier = nil
|
|
17
|
+
@turbo_enabled = defined?(Turbo::Broadcastable)
|
|
18
|
+
@queue_name = :default
|
|
19
|
+
@sources = { google_places: "Prospector::Sources::GooglePlaces::Adapter" }
|
|
20
|
+
@on_approve_block = nil
|
|
21
|
+
@authenticate_admin_block = nil
|
|
22
|
+
@duplicate_check_block = nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def on_approve(&block)
|
|
26
|
+
@on_approve_block = block if block
|
|
27
|
+
@on_approve_block
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def authenticate_admin_with(&block)
|
|
31
|
+
@authenticate_admin_block = block if block
|
|
32
|
+
@authenticate_admin_block
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def duplicate_check(&block)
|
|
36
|
+
@duplicate_check_block = block if block
|
|
37
|
+
@duplicate_check_block
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def register_source(key, adapter_class)
|
|
41
|
+
@sources[key.to_sym] = adapter_class.to_s
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def resolve_source(key)
|
|
45
|
+
adapter = @sources[key.to_sym]
|
|
46
|
+
raise AdapterNotFoundError, "No source adapter registered for :#{key}" unless adapter
|
|
47
|
+
|
|
48
|
+
adapter.is_a?(String) ? adapter.constantize : adapter
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def validate!
|
|
52
|
+
raise ConfigurationError, "Prospector.config.domain must be set" if domain.blank?
|
|
53
|
+
raise ConfigurationError, "Prospector.config.authenticate_admin_with must be set" unless @authenticate_admin_block
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
module Prospector
|
|
2
|
+
class Engine < ::Rails::Engine
|
|
3
|
+
isolate_namespace Prospector
|
|
4
|
+
|
|
5
|
+
config.autoload_paths << File.expand_path("..", __dir__)
|
|
6
|
+
config.eager_load_paths << File.expand_path("..", __dir__)
|
|
7
|
+
|
|
8
|
+
initializer "prospector.configuration" do
|
|
9
|
+
Prospector.config.validate! if Rails.env.production?
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
initializer "prospector.assets" do |app|
|
|
13
|
+
if app.config.respond_to?(:assets)
|
|
14
|
+
app.config.assets.paths << root.join("app/assets/stylesheets")
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|