prospector_engine 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +333 -0
  4. data/Rakefile +9 -0
  5. data/app/CLAUDE.md +43 -0
  6. data/app/assets/stylesheets/prospector/application.css +476 -0
  7. data/app/controllers/prospector/application_controller.rb +16 -0
  8. data/app/controllers/prospector/candidates_controller.rb +31 -0
  9. data/app/controllers/prospector/keyword_generations_controller.rb +10 -0
  10. data/app/controllers/prospector/keywords_controller.rb +38 -0
  11. data/app/controllers/prospector/run_bulk_approvals_controller.rb +13 -0
  12. data/app/controllers/prospector/run_cancellations_controller.rb +9 -0
  13. data/app/controllers/prospector/run_reclassifications_controller.rb +21 -0
  14. data/app/controllers/prospector/run_restarts_controller.rb +14 -0
  15. data/app/controllers/prospector/run_retries_controller.rb +14 -0
  16. data/app/controllers/prospector/runs_controller.rb +47 -0
  17. data/app/jobs/prospector/application_job.rb +5 -0
  18. data/app/jobs/prospector/bulk_approve_job.rb +14 -0
  19. data/app/jobs/prospector/classify_job.rb +17 -0
  20. data/app/jobs/prospector/fetch_job.rb +8 -0
  21. data/app/models/prospector/application_record.rb +6 -0
  22. data/app/models/prospector/candidate.rb +93 -0
  23. data/app/models/prospector/classification_run.rb +15 -0
  24. data/app/models/prospector/keyword.rb +16 -0
  25. data/app/models/prospector/run.rb +94 -0
  26. data/app/views/prospector/candidates/show.html.erb +63 -0
  27. data/app/views/prospector/keywords/index.html.erb +72 -0
  28. data/app/views/prospector/layouts/prospector.html.erb +38 -0
  29. data/app/views/prospector/runs/index.html.erb +33 -0
  30. data/app/views/prospector/runs/new.html.erb +109 -0
  31. data/app/views/prospector/runs/show.html.erb +111 -0
  32. data/config/routes.rb +15 -0
  33. data/db/prospector_schema.rb +81 -0
  34. data/lib/generators/prospector/install/install_generator.rb +31 -0
  35. data/lib/generators/prospector/install/templates/create_prospector_tables.rb +83 -0
  36. data/lib/generators/prospector/install/templates/prospector.rb +37 -0
  37. data/lib/prospector/CLAUDE.md +52 -0
  38. data/lib/prospector/classification/runner.rb +105 -0
  39. data/lib/prospector/configuration.rb +56 -0
  40. data/lib/prospector/engine.rb +18 -0
  41. data/lib/prospector/enrichment/contact_scraper.rb +188 -0
  42. data/lib/prospector/error.rb +8 -0
  43. data/lib/prospector/geography/base.rb +40 -0
  44. data/lib/prospector/geography/bounding_box.rb +58 -0
  45. data/lib/prospector/geography/city.rb +29 -0
  46. data/lib/prospector/geography/coordinates.rb +43 -0
  47. data/lib/prospector/geography/metro_area.rb +74 -0
  48. data/lib/prospector/geography/zip_code.rb +25 -0
  49. data/lib/prospector/keywords/generator.rb +74 -0
  50. data/lib/prospector/pipeline/normalizer.rb +57 -0
  51. data/lib/prospector/pipeline/orchestrator.rb +151 -0
  52. data/lib/prospector/sources/base.rb +13 -0
  53. data/lib/prospector/sources/google_places/adapter.rb +92 -0
  54. data/lib/prospector/sources/google_places/client.rb +58 -0
  55. data/lib/prospector/sources/google_places/us_address_validator.rb +24 -0
  56. data/lib/prospector/sources/result.rb +21 -0
  57. data/lib/prospector/version.rb +3 -0
  58. data/lib/prospector.rb +20 -0
  59. metadata +185 -0
@@ -0,0 +1,111 @@
1
+ <% if defined?(Turbo::StreamsFrom) %>
2
+ <%= turbo_stream_from @run %>
3
+ <% end %>
4
+
5
+ <div class="prospector-flex prospector-flex-between">
6
+ <h1 class="prospector-heading"><%= @run.label || "Run ##{@run.id}" %></h1>
7
+ <div class="prospector-flex prospector-gap-2">
8
+ <% if @run.cancellable? %>
9
+ <%= button_to "Cancel", run_cancellation_path(@run), method: :post, class: "prospector-btn prospector-btn-danger prospector-btn-sm" %>
10
+ <% end %>
11
+ <% if @run.retryable? %>
12
+ <%= button_to "Retry", run_retry_path(@run), method: :post, class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
13
+ <% end %>
14
+ <% if @run.restartable? %>
15
+ <%= button_to "Restart", run_restart_path(@run), method: :post, class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
16
+ <% end %>
17
+ </div>
18
+ </div>
19
+
20
+ <% if @run.error_messages.present? %>
21
+ <div class="prospector-card" style="border-left: 3px solid var(--p-danger); background: var(--p-danger-bg);">
22
+ <div class="prospector-subheading" style="color: var(--p-danger);">Error</div>
23
+ <pre style="font-family: var(--p-mono, monospace); font-size: 0.75rem; color: var(--p-text-secondary, #a1a1a8); white-space: pre-wrap; margin: 0;"><%= @run.error_messages %></pre>
24
+ </div>
25
+ <% end %>
26
+
27
+ <div class="prospector-stats">
28
+ <div class="prospector-stat">
29
+ <div class="prospector-stat-value"><%= @run.total_found %></div>
30
+ <div class="prospector-stat-label">Found</div>
31
+ </div>
32
+ <div class="prospector-stat">
33
+ <div class="prospector-stat-value"><%= @run.fetched_count %></div>
34
+ <div class="prospector-stat-label">Fetched</div>
35
+ </div>
36
+ <div class="prospector-stat">
37
+ <div class="prospector-stat-value"><%= @run.skipped_count %></div>
38
+ <div class="prospector-stat-label">Skipped</div>
39
+ </div>
40
+ <div class="prospector-stat">
41
+ <div class="prospector-stat-value"><span class="prospector-badge prospector-badge-<%= @run.status %>"><%= @run.status.titleize %></span></div>
42
+ <div class="prospector-stat-label">Status</div>
43
+ </div>
44
+ </div>
45
+
46
+ <% if @run.completed? || @run.classifying? %>
47
+ <div class="prospector-flex prospector-gap-2 prospector-mt-4" style="margin-bottom: 1rem;">
48
+ <%= button_to "Bulk Approve All", run_bulk_approval_path(@run), method: :post, class: "prospector-btn prospector-btn-primary prospector-btn-sm" %>
49
+ <%= button_to "Reclassify", run_reclassification_path(@run), method: :post, class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
50
+ </div>
51
+ <% end %>
52
+
53
+ <div class="prospector-tabs">
54
+ <% %w[pending approved rejected].each do |filter| %>
55
+ <% count = @status_counts[filter] || 0 %>
56
+ <%= link_to run_path(@run, filter: filter),
57
+ class: "prospector-tab #{@filter == filter ? 'prospector-tab-active' : ''}" do %>
58
+ <%= filter.titleize %> (<%= count %>)
59
+ <% end %>
60
+ <% end %>
61
+ </div>
62
+
63
+ <div class="prospector-card">
64
+ <table class="prospector-table">
65
+ <thead>
66
+ <tr>
67
+ <th>Name</th>
68
+ <th>Address</th>
69
+ <th>Contact</th>
70
+ <th>Status</th>
71
+ <th>Actions</th>
72
+ </tr>
73
+ </thead>
74
+ <tbody>
75
+ <% @candidates.each do |candidate| %>
76
+ <tr>
77
+ <td>
78
+ <strong><%= candidate.name %></strong>
79
+ <% if candidate.website.present? %>
80
+ <br><a href="<%= candidate.website %>" target="_blank" class="prospector-text-muted prospector-text-sm"><%= truncate(candidate.website, length: 40) %></a>
81
+ <% end %>
82
+ </td>
83
+ <td class="prospector-text-sm"><%= candidate.address %></td>
84
+ <td class="prospector-text-sm">
85
+ <% if candidate.email.present? %>
86
+ <%= mail_to candidate.email, candidate.email, class: "prospector-text-sm" %><br>
87
+ <% end %>
88
+ <% if candidate.has_contact_info? %>
89
+ <span class="prospector-text-muted"><%= Prospector::Candidate::SOCIAL_FIELDS.count { |f| candidate[f].present? } %> social</span>
90
+ <% end %>
91
+ </td>
92
+ <td><span class="prospector-badge prospector-badge-<%= candidate.status %>"><%= candidate.status.titleize %></span></td>
93
+ <td>
94
+ <div class="prospector-flex prospector-gap-2">
95
+ <% if candidate.pending? && candidate.approvable? %>
96
+ <%= button_to "Approve", run_candidate_path(@run, candidate, status: "approved", return_filter: @filter), method: :patch, class: "prospector-btn prospector-btn-primary prospector-btn-sm" %>
97
+ <% end %>
98
+ <% if candidate.pending? %>
99
+ <%= button_to "Reject", run_candidate_path(@run, candidate, status: "rejected", return_filter: @filter), method: :patch, class: "prospector-btn prospector-btn-danger prospector-btn-sm" %>
100
+ <% end %>
101
+ <% if candidate.rejected? %>
102
+ <%= button_to "Restore", run_candidate_path(@run, candidate, status: "pending", return_filter: @filter), method: :patch, class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
103
+ <% end %>
104
+ <%= link_to "Details", run_candidate_path(@run, candidate), class: "prospector-btn prospector-btn-outline prospector-btn-sm" %>
105
+ </div>
106
+ </td>
107
+ </tr>
108
+ <% end %>
109
+ </tbody>
110
+ </table>
111
+ </div>
data/config/routes.rb ADDED
@@ -0,0 +1,15 @@
1
+ Prospector::Engine.routes.draw do
2
+ resources :runs, only: [:index, :show, :new, :create] do
3
+ resource :retry, only: [:create], controller: "run_retries"
4
+ resource :restart, only: [:create], controller: "run_restarts"
5
+ resource :cancellation, only: [:create], controller: "run_cancellations"
6
+ resource :reclassification, only: [:create], controller: "run_reclassifications"
7
+ resource :bulk_approval, only: [:create], controller: "run_bulk_approvals"
8
+ resources :candidates, only: [:show, :update]
9
+ end
10
+
11
+ resources :keywords, only: [:index, :create, :update, :destroy]
12
+ resources :keyword_generations, only: [:create]
13
+
14
+ root to: "runs#index"
15
+ end
@@ -0,0 +1,81 @@
1
+ ActiveRecord::Schema.define do
2
+ create_table :prospector_runs, if_not_exists: true do |t|
3
+ t.string :status, null: false, default: "pending"
4
+ t.string :source_adapter, null: false
5
+ t.string :geography_type, null: false
6
+ t.jsonb :geography_data, null: false, default: {}
7
+ t.string :categories, array: true, default: []
8
+ t.string :label
9
+ t.integer :actor_id
10
+ t.integer :total_found, default: 0
11
+ t.integer :fetched_count, default: 0
12
+ t.integer :skipped_count, default: 0
13
+ t.integer :error_count, default: 0
14
+ t.text :error_messages
15
+ t.jsonb :metadata, default: {}
16
+ t.datetime :started_at
17
+ t.datetime :completed_at
18
+ t.timestamps null: false
19
+
20
+ t.index :status
21
+ t.index :source_adapter
22
+ t.index :created_at
23
+ end
24
+
25
+ create_table :prospector_candidates, if_not_exists: true do |t|
26
+ t.references :run, null: false, foreign_key: { to_table: :prospector_runs }
27
+ t.string :status, null: false, default: "pending"
28
+ t.string :name, null: false
29
+ t.string :address, null: false
30
+ t.decimal :latitude, precision: 10, scale: 6
31
+ t.decimal :longitude, precision: 10, scale: 6
32
+ t.string :phone_number
33
+ t.string :website
34
+ t.string :email
35
+ t.string :facebook_url
36
+ t.string :instagram_url
37
+ t.string :linkedin_url
38
+ t.string :tiktok_url
39
+ t.string :youtube_url
40
+ t.text :description
41
+ t.string :category
42
+ t.jsonb :hours_of_operation, default: {}
43
+ t.string :source_uid, null: false
44
+ t.jsonb :source_data, default: {}
45
+ t.jsonb :metadata, default: {}
46
+ t.timestamps null: false
47
+
48
+ t.index [:run_id, :source_uid], unique: true
49
+ t.index [:run_id, :status]
50
+ t.index :status
51
+ t.index :source_uid
52
+ end
53
+
54
+ create_table :prospector_classification_runs, if_not_exists: true do |t|
55
+ t.references :run, null: false, foreign_key: { to_table: :prospector_runs }
56
+ t.integer :actor_id
57
+ t.string :ai_model, null: false
58
+ t.string :status, null: false, default: "pending"
59
+ t.boolean :include_approved, null: false, default: false
60
+ t.jsonb :results
61
+ t.text :error_message
62
+ t.datetime :started_at
63
+ t.datetime :completed_at
64
+ t.timestamps null: false
65
+
66
+ t.index [:run_id, :status]
67
+ end
68
+
69
+ create_table :prospector_keywords, if_not_exists: true do |t|
70
+ t.string :domain, null: false
71
+ t.string :category, null: false
72
+ t.string :keyword, null: false
73
+ t.string :source, null: false, default: "llm"
74
+ t.boolean :active, null: false, default: true
75
+ t.jsonb :metadata, default: {}
76
+ t.timestamps null: false
77
+
78
+ t.index [:domain, :category, :keyword], unique: true
79
+ t.index [:domain, :active]
80
+ end
81
+ end
@@ -0,0 +1,31 @@
1
+ module Prospector
2
+ module Generators
3
+ class InstallGenerator < Rails::Generators::Base
4
+ source_root File.expand_path("templates", __dir__)
5
+
6
+ def create_initializer
7
+ template "prospector.rb", "config/initializers/prospector.rb"
8
+ end
9
+
10
+ def create_migration
11
+ template "create_prospector_tables.rb",
12
+ "db/migrate/#{Time.now.strftime('%Y%m%d%H%M%S')}_create_prospector_tables.rb"
13
+ end
14
+
15
+ def mount_engine
16
+ route 'mount Prospector::Engine => "/prospector"'
17
+ end
18
+
19
+ def display_post_install
20
+ say ""
21
+ say "Prospector installed successfully!", :green
22
+ say ""
23
+ say "Next steps:"
24
+ say " 1. Edit config/initializers/prospector.rb"
25
+ say " 2. Run: bin/rails db:migrate"
26
+ say " 3. Visit /prospector in your browser"
27
+ say ""
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,83 @@
1
+ class CreateProspectorTables < ActiveRecord::Migration[7.1]
2
+ def change
3
+ create_table :prospector_runs do |t|
4
+ t.string :status, null: false, default: "pending"
5
+ t.string :source_adapter, null: false
6
+ t.string :geography_type, null: false
7
+ t.jsonb :geography_data, null: false, default: {}
8
+ t.string :categories, array: true, default: []
9
+ t.string :label
10
+ t.integer :actor_id
11
+ t.integer :total_found, default: 0
12
+ t.integer :fetched_count, default: 0
13
+ t.integer :skipped_count, default: 0
14
+ t.integer :error_count, default: 0
15
+ t.text :error_messages
16
+ t.jsonb :metadata, default: {}
17
+ t.datetime :started_at
18
+ t.datetime :completed_at
19
+ t.timestamps null: false
20
+
21
+ t.index :status
22
+ t.index :source_adapter
23
+ t.index :created_at
24
+ end
25
+
26
+ create_table :prospector_candidates do |t|
27
+ t.references :run, null: false, foreign_key: { to_table: :prospector_runs }
28
+ t.string :status, null: false, default: "pending"
29
+ t.string :name, null: false
30
+ t.string :address, null: false
31
+ t.decimal :latitude, precision: 10, scale: 6
32
+ t.decimal :longitude, precision: 10, scale: 6
33
+ t.string :phone_number
34
+ t.string :website
35
+ t.string :email
36
+ t.string :facebook_url
37
+ t.string :instagram_url
38
+ t.string :linkedin_url
39
+ t.string :tiktok_url
40
+ t.string :youtube_url
41
+ t.text :description
42
+ t.string :category
43
+ t.jsonb :hours_of_operation, default: {}
44
+ t.string :source_uid, null: false
45
+ t.jsonb :source_data, default: {}
46
+ t.jsonb :metadata, default: {}
47
+ t.timestamps null: false
48
+
49
+ t.index [:run_id, :source_uid], unique: true
50
+ t.index [:run_id, :status]
51
+ t.index :status
52
+ t.index :source_uid
53
+ end
54
+
55
+ create_table :prospector_classification_runs do |t|
56
+ t.references :run, null: false, foreign_key: { to_table: :prospector_runs }
57
+ t.integer :actor_id
58
+ t.string :ai_model, null: false
59
+ t.string :status, null: false, default: "pending"
60
+ t.boolean :include_approved, null: false, default: false
61
+ t.jsonb :results
62
+ t.text :error_message
63
+ t.datetime :started_at
64
+ t.datetime :completed_at
65
+ t.timestamps null: false
66
+
67
+ t.index [:run_id, :status]
68
+ end
69
+
70
+ create_table :prospector_keywords do |t|
71
+ t.string :domain, null: false
72
+ t.string :category, null: false
73
+ t.string :keyword, null: false
74
+ t.string :source, null: false, default: "llm"
75
+ t.boolean :active, null: false, default: true
76
+ t.jsonb :metadata, default: {}
77
+ t.timestamps null: false
78
+
79
+ t.index [:domain, :category, :keyword], unique: true
80
+ t.index [:domain, :active]
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,37 @@
1
+ Prospector.configure do |config|
2
+ # Required: domain slug used for keyword generation and classification
3
+ config.domain = "your_domain"
4
+
5
+ # Required: block called when a candidate is approved
6
+ # The candidate is a Prospector::Candidate instance
7
+ config.on_approve do |candidate|
8
+ # Example:
9
+ # YourModel.create_from_prospector!(candidate)
10
+ raise "Configure Prospector.config.on_approve in config/initializers/prospector.rb"
11
+ end
12
+
13
+ # Required: admin authentication
14
+ config.authenticate_admin_with do |controller|
15
+ # Example:
16
+ # controller.current_user&.admin?
17
+ raise "Configure Prospector.config.authenticate_admin_with in config/initializers/prospector.rb"
18
+ end
19
+
20
+ # Required: classifier class (must inherit LlmClassifier::Classifier)
21
+ # Generate one with: rails g llm_classifier:classifier YourDomain category1 category2
22
+ # config.classifier = YourDomainClassifier
23
+
24
+ # Optional: default source adapter (default: :google_places)
25
+ # config.default_source = :google_places
26
+
27
+ # Optional: default AI model for classification (passed to classifier)
28
+ # config.default_classifier_model = "anthropic:claude-sonnet-4-20250514"
29
+
30
+ # Optional: duplicate check hook
31
+ # config.duplicate_check do |source_uid:, name:, **|
32
+ # YourModel.exists?(["import_metadata->>'place_id' = ?", source_uid])
33
+ # end
34
+
35
+ # Optional: job queue name (default: :default)
36
+ # config.queue_name = :prospector
37
+ end
@@ -0,0 +1,52 @@
1
+ # Library Code
2
+
3
+ ## Sources (`sources/`)
4
+
5
+ Pluggable adapter system for fetching business data from external APIs.
6
+
7
+ ### Interface
8
+
9
+ All adapters extend `Sources::Base` and implement `fetch(geography:, keywords:)` returning `Array<Sources::Result>`.
10
+
11
+ ### Google Places Adapter (`sources/google_places/`)
12
+
13
+ - `Adapter` - Main adapter class. Deduplicates by UID across keyword iterations. Routes to text search or nearby search based on geography type.
14
+ - `Client` - HTTParty wrapper for Google Places API (New). Endpoints: searchText, searchNearby.
15
+ - `UsAddressValidator` - Filters non-US addresses via state code regex.
16
+
17
+ ### Adding a New Source
18
+
19
+ 1. Create `sources/your_source/adapter.rb` extending `Sources::Base`
20
+ 2. Implement `fetch(geography:, keywords:)` returning `Array<Sources::Result>`
21
+ 3. Register in host app: `config.register_source(:your_source, YourSource::Adapter)`
22
+
23
+ ## Geography (`geography/`)
24
+
25
+ Value objects representing search areas. All extend `Geography::Base`.
26
+
27
+ - `MetroArea` - name + primary_state. Text search only.
28
+ - `City` - city + state. Text search only.
29
+ - `Coordinates` - lat/lng + radius_meters. Nearby search only.
30
+ - `ZipCode` - zip code string. Text search only.
31
+ - `BoundingBox` - NE/SW corners. Converted to center+radius for nearby search.
32
+
33
+ Key methods: `to_query_string` (for text search), `to_coordinate_restriction` (for nearby search), `to_h`/`from_h` (serialization).
34
+
35
+ ## Pipeline (`pipeline/`)
36
+
37
+ - `Orchestrator` - Coordinates the full fetch flow. Called by `FetchJob`. Collects keywords, calls adapter, normalizes results, deduplicates, creates candidates.
38
+ - `Normalizer` - Parses formatted address strings into structured components (street, city, state, zip).
39
+
40
+ ## Classification (`classification/`)
41
+
42
+ - `Runner` - Iterates unprocessed candidates, calls LLM via ruby_llm, stores results in candidate metadata. Auto-rejects candidates with empty categories.
43
+
44
+ ## Keywords (`keywords/`)
45
+
46
+ - `Generator` - Checks DB for existing keywords first. On cache miss, calls LLM to generate keywords and persists them.
47
+
48
+ ## Other Files
49
+
50
+ - `configuration.rb` - `Prospector.configure` DSL. Required keys: domain, on_approve, authenticate_admin_with.
51
+ - `engine.rb` - Rails::Engine with isolate_namespace.
52
+ - `error.rb` - Error hierarchy: Error, ConfigurationError, AdapterError, MissingApiKeyError, ClassificationError.
@@ -0,0 +1,105 @@
1
+ module Prospector
2
+ module Classification
3
+ class Runner
4
+ def initialize(run, model: nil, include_approved: false, classification_run: nil)
5
+ @run = run
6
+ @model = model || Prospector.config.default_classifier_model
7
+ @include_approved = include_approved
8
+ @classification_run = classification_run
9
+ @processed = 0
10
+ @rejected = 0
11
+ @errors = 0
12
+ end
13
+
14
+ def perform
15
+ classify_candidates
16
+ finalize
17
+ end
18
+
19
+ private
20
+
21
+ def classify_candidates
22
+ candidates_to_classify.find_each do |candidate|
23
+ break if @run.reload.cancelled?
24
+
25
+ classify_candidate(candidate)
26
+ end
27
+ end
28
+
29
+ def candidates_to_classify
30
+ scope = @run.candidates
31
+ scope = @include_approved ? scope.where(status: %w[pending approved]) : scope.where(status: "pending")
32
+ scope.ai_unprocessed
33
+ end
34
+
35
+ def classify_candidate(candidate)
36
+ result = invoke_classifier(candidate)
37
+ store_result(candidate, result)
38
+ auto_reject_if_empty(candidate, result)
39
+ @processed += 1
40
+ rescue => e
41
+ candidate.update!(metadata: candidate.metadata.merge("llm_error" => e.message))
42
+ @errors += 1
43
+ end
44
+
45
+ def invoke_classifier(candidate)
46
+ classifier = resolve_classifier
47
+ input = build_input(candidate)
48
+ classifier.classify(input, model: @model)
49
+ end
50
+
51
+ def resolve_classifier
52
+ raw = Prospector.config.classifier
53
+ raise ConfigurationError, "Prospector.config.classifier must be set" unless raw
54
+
55
+ raw.is_a?(String) ? raw.constantize : raw
56
+ end
57
+
58
+ def build_input(candidate)
59
+ {
60
+ name: candidate.name,
61
+ address: candidate.address,
62
+ website: candidate.website,
63
+ description: candidate.description,
64
+ source_types: candidate.source_data.dig("types")&.join(", ")
65
+ }
66
+ end
67
+
68
+ def store_result(candidate, result)
69
+ candidate.update!(metadata: candidate.metadata.merge(
70
+ "llm_categories" => result.categories,
71
+ "llm_confidence" => result.confidence,
72
+ "llm_reasoning" => result.reasoning,
73
+ "llm_classified_at" => Time.current.iso8601,
74
+ "llm_model" => result.model,
75
+ "classification_run_id" => @classification_run&.id
76
+ ))
77
+ end
78
+
79
+ def auto_reject_if_empty(candidate, result)
80
+ return if result.categories.any?
81
+
82
+ candidate.reject!(reason: "no_relevant_categories")
83
+ @rejected += 1
84
+ end
85
+
86
+ def finalize
87
+ return finalize_classification_run("cancelled") if @run.reload.cancelled?
88
+
89
+ run_status = @processed.zero? && @errors.positive? ? "failed" : "completed"
90
+ @run.update!(status: run_status, completed_at: Time.current)
91
+ finalize_classification_run(run_status)
92
+ end
93
+
94
+ def finalize_classification_run(run_status)
95
+ return unless @classification_run
96
+
97
+ @classification_run.update!(
98
+ status: run_status,
99
+ completed_at: Time.current,
100
+ results: { "processed" => @processed, "rejected" => @rejected, "errors" => @errors }
101
+ )
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,56 @@
1
+ module Prospector
2
+ class Configuration
3
+ attr_accessor :domain,
4
+ :default_source,
5
+ :default_classifier_model,
6
+ :classifier,
7
+ :turbo_enabled,
8
+ :queue_name
9
+
10
+ attr_reader :sources
11
+
12
+ def initialize
13
+ @domain = nil
14
+ @default_source = :google_places
15
+ @default_classifier_model = "anthropic:claude-sonnet-4-20250514"
16
+ @classifier = nil
17
+ @turbo_enabled = defined?(Turbo::Broadcastable)
18
+ @queue_name = :default
19
+ @sources = { google_places: "Prospector::Sources::GooglePlaces::Adapter" }
20
+ @on_approve_block = nil
21
+ @authenticate_admin_block = nil
22
+ @duplicate_check_block = nil
23
+ end
24
+
25
+ def on_approve(&block)
26
+ @on_approve_block = block if block
27
+ @on_approve_block
28
+ end
29
+
30
+ def authenticate_admin_with(&block)
31
+ @authenticate_admin_block = block if block
32
+ @authenticate_admin_block
33
+ end
34
+
35
+ def duplicate_check(&block)
36
+ @duplicate_check_block = block if block
37
+ @duplicate_check_block
38
+ end
39
+
40
+ def register_source(key, adapter_class)
41
+ @sources[key.to_sym] = adapter_class.to_s
42
+ end
43
+
44
+ def resolve_source(key)
45
+ adapter = @sources[key.to_sym]
46
+ raise AdapterNotFoundError, "No source adapter registered for :#{key}" unless adapter
47
+
48
+ adapter.is_a?(String) ? adapter.constantize : adapter
49
+ end
50
+
51
+ def validate!
52
+ raise ConfigurationError, "Prospector.config.domain must be set" if domain.blank?
53
+ raise ConfigurationError, "Prospector.config.authenticate_admin_with must be set" unless @authenticate_admin_block
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,18 @@
1
+ module Prospector
2
+ class Engine < ::Rails::Engine
3
+ isolate_namespace Prospector
4
+
5
+ config.autoload_paths << File.expand_path("..", __dir__)
6
+ config.eager_load_paths << File.expand_path("..", __dir__)
7
+
8
+ initializer "prospector.configuration" do
9
+ Prospector.config.validate! if Rails.env.production?
10
+ end
11
+
12
+ initializer "prospector.assets" do |app|
13
+ if app.config.respond_to?(:assets)
14
+ app.config.assets.paths << root.join("app/assets/stylesheets")
15
+ end
16
+ end
17
+ end
18
+ end