bandcamp-discover 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fc34d8854761f38e20c75aa23bb7b4be9ce47fe163720be9f6b0d39f4b9de2f
4
- data.tar.gz: 134e62a2644a9411bec1d7049ff741cfde2d70847eddab58efbbd75c7e01f564
3
+ metadata.gz: a65e9bf97cd9ca459ed51bfebb55d81262de863b6e291187d6b41d004ae2e863
4
+ data.tar.gz: b8bd036e56d9276480966cdabb0ef49eda334d325559cd41cde9cf54c3b7e0d8
5
5
  SHA512:
6
- metadata.gz: b198b2a4330308e1d9cb887132ec9d78f083e2de53279088a4677972109e97f97e19cfbc4692f56b2e38098c2d22f8d60eabc726b2d89954bf89d54f9edc726e
7
- data.tar.gz: 4feb458ce7370f4b7fae724a451723cc59ca778c1b420ae1712b491f06dcde89de6eef607c1eae832ceeb4a57390a1957431f9a19d3620e903dbac64aef5909f
6
+ metadata.gz: c2ef36b3ff546e648f9029da0e42cc4d7fbaced58112156788f30255bbf3ff595448030d827e608fb5ff596d79b3734394de734ca983a6397341a45c6015c1a5
7
+ data.tar.gz: 622f759d307cf57357b278c77f2310f1917281a459e6b15856c98811be1926a9344556cf08f4681b80cddc9632239192217d5c67d3184251da001386a25a1bb8
data/Gemfile.lock ADDED
@@ -0,0 +1,110 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ bandcamp-discover (0.1.7)
5
+ async
6
+ base64
7
+ concurrent-ruby
8
+ gli (~> 2.21.5)
9
+ logger
10
+ ostruct
11
+ playwright-ruby-client
12
+ sqlite3
13
+
14
+ GEM
15
+ remote: https://rubygems.org/
16
+ specs:
17
+ ast (2.4.2)
18
+ async (2.15.3)
19
+ console (~> 1.26)
20
+ fiber-annotation
21
+ io-event (~> 1.6, >= 1.6.5)
22
+ base64 (0.2.0)
23
+ concurrent-ruby (1.3.5)
24
+ console (1.27.0)
25
+ fiber-annotation
26
+ fiber-local (~> 1.1)
27
+ json
28
+ fiber-annotation (0.2.0)
29
+ fiber-local (1.1.0)
30
+ fiber-storage
31
+ fiber-storage (1.0.0)
32
+ gli (2.21.5)
33
+ io-event (1.6.5)
34
+ json (2.7.2)
35
+ language_server-protocol (3.17.0.3)
36
+ lint_roller (1.1.0)
37
+ logger (1.7.0)
38
+ mime-types (3.7.0)
39
+ logger
40
+ mime-types-data (~> 3.2025, >= 3.2025.0507)
41
+ mime-types-data (3.2025.0729)
42
+ mini_portile2 (2.8.7)
43
+ minitest (5.24.1)
44
+ ostruct (0.6.1)
45
+ parallel (1.25.1)
46
+ parser (3.3.4.0)
47
+ ast (~> 2.4.1)
48
+ racc
49
+ playwright-ruby-client (1.54.0)
50
+ concurrent-ruby (>= 1.1.6)
51
+ mime-types (>= 3.0)
52
+ psych (5.1.2)
53
+ stringio
54
+ racc (1.8.1)
55
+ rainbow (3.1.1)
56
+ rake (13.2.1)
57
+ rdoc (6.7.0)
58
+ psych (>= 4.0.0)
59
+ regexp_parser (2.9.2)
60
+ rexml (3.3.4)
61
+ strscan
62
+ rubocop (1.64.1)
63
+ json (~> 2.3)
64
+ language_server-protocol (>= 3.17.0)
65
+ parallel (~> 1.10)
66
+ parser (>= 3.3.0.2)
67
+ rainbow (>= 2.2.2, < 4.0)
68
+ regexp_parser (>= 1.8, < 3.0)
69
+ rexml (>= 3.2.5, < 4.0)
70
+ rubocop-ast (>= 1.31.1, < 2.0)
71
+ ruby-progressbar (~> 1.7)
72
+ unicode-display_width (>= 2.4.0, < 3.0)
73
+ rubocop-ast (1.31.3)
74
+ parser (>= 3.3.1.0)
75
+ rubocop-performance (1.21.1)
76
+ rubocop (>= 1.48.1, < 2.0)
77
+ rubocop-ast (>= 1.31.1, < 2.0)
78
+ ruby-progressbar (1.13.0)
79
+ sqlite3 (2.5.0)
80
+ mini_portile2 (~> 2.8.0)
81
+ sqlite3 (2.5.0-arm64-darwin)
82
+ standard (1.39.2)
83
+ language_server-protocol (~> 3.17.0.2)
84
+ lint_roller (~> 1.0)
85
+ rubocop (~> 1.64.0)
86
+ standard-custom (~> 1.0.0)
87
+ standard-performance (~> 1.4)
88
+ standard-custom (1.0.2)
89
+ lint_roller (~> 1.0)
90
+ rubocop (~> 1.50)
91
+ standard-performance (1.4.0)
92
+ lint_roller (~> 1.1)
93
+ rubocop-performance (~> 1.21.0)
94
+ stringio (3.1.1)
95
+ strscan (3.1.0)
96
+ unicode-display_width (2.5.0)
97
+
98
+ PLATFORMS
99
+ arm64-darwin-22
100
+ ruby
101
+
102
+ DEPENDENCIES
103
+ bandcamp-discover!
104
+ minitest
105
+ rake
106
+ rdoc
107
+ standard
108
+
109
+ BUNDLED WITH
110
+ 2.5.9
@@ -0,0 +1,30 @@
1
+ module BandcampDiscover
2
+ class Analyzer
3
+ def initialize(description)
4
+ @description = description
5
+ end
6
+
7
+ def label?
8
+ if defined?(OpenRouter) && !!OpenRouter.configuration.access_token
9
+ response = OpenRouter::Client.new.complete(
10
+ [
11
+ { role: "system", content: "You are given a description that could or could not be that of a record label. Analyze and answer with true or false only. Be critical: Individuals and bands are not labels, but collectives can be labels." },
12
+ { role: "user", content: @description }
13
+ ],
14
+ model: [
15
+ "mistralai/mistral-small-3.2-24b-instruct:free"
16
+ ],
17
+ extras: {
18
+ response_format: {
19
+ type: "json_object"
20
+ }
21
+ }
22
+ )
23
+
24
+ JSON.parse(response["choices"][0]["message"]["content"])["answer"]&.downcase == "true"
25
+ else
26
+ @description.match? /label|platform|records/i
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,5 +1,6 @@
1
1
  require_relative "./base"
2
2
  require_relative "./music"
3
+ require_relative "../analyzer"
3
4
 
4
5
  module BandcampDiscover
5
6
  module Scrapers
@@ -15,7 +16,7 @@ module BandcampDiscover
15
16
  name = band_name_location_container.query_selector(".title").inner_text
16
17
  location = band_name_location_container.query_selector(".location").inner_text
17
18
 
18
- if force || bio_text&.inner_html =~ /label|platform|records/i
19
+ if force || Analyzer.new(bio_text&.inner_html).label?
19
20
  return Sync do
20
21
  music_tags = Scrapers::Music.new(url: "#{@url}/music", browser: @browser, max_tasks: @max_tasks).scrape
21
22
 
@@ -23,7 +23,7 @@ module BandcampDiscover
23
23
 
24
24
  album_tags = album_links.take(20).map do |album_link|
25
25
  semaphore.async do
26
- url = "#{@base_url}#{album_link[:href]}"
26
+ url = album_link[:href].start_with?("https://") ? album_link[:href] : "#{@base_url}#{album_link[:href]}"
27
27
  puts "starting to scrape #{url}"
28
28
 
29
29
  tags = Scrapers::Album.new(url: url, browser: @browser).scrape
@@ -1,3 +1,3 @@
1
1
  module BandcampDiscover
2
- VERSION = "0.1.6"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,7 +1,7 @@
1
1
  require 'async'
2
2
  require 'bandcamp-discover/version.rb'
3
+ require 'bandcamp-discover/analyzer.rb'
3
4
  require 'bandcamp-discover/scrapers/discover.rb'
4
5
 
5
6
  # Add requires for other files you add to your project here, so
6
7
  # you just need to require this one file in your bin file
7
-
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bandcamp-discover
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julian RUbisch
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-03-02 00:00:00.000000000 Z
10
+ date: 2025-08-17 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: rake
@@ -187,12 +187,14 @@ extra_rdoc_files:
187
187
  files:
188
188
  - ".gitignore"
189
189
  - Gemfile
190
+ - Gemfile.lock
190
191
  - README.rdoc
191
192
  - Rakefile
192
193
  - bandcamp-discover.gemspec
193
194
  - bandcamp-discover.rdoc
194
195
  - bin/bandcamp-discover
195
196
  - lib/bandcamp-discover.rb
197
+ - lib/bandcamp-discover/analyzer.rb
196
198
  - lib/bandcamp-discover/scrapers/album.rb
197
199
  - lib/bandcamp-discover/scrapers/base.rb
198
200
  - lib/bandcamp-discover/scrapers/discover.rb