bandcamp-discover 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +110 -0
- data/lib/bandcamp-discover/analyzer.rb +30 -0
- data/lib/bandcamp-discover/scrapers/label.rb +2 -1
- data/lib/bandcamp-discover/scrapers/music.rb +1 -1
- data/lib/bandcamp-discover/version.rb +1 -1
- data/lib/bandcamp-discover.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a65e9bf97cd9ca459ed51bfebb55d81262de863b6e291187d6b41d004ae2e863
|
4
|
+
data.tar.gz: b8bd036e56d9276480966cdabb0ef49eda334d325559cd41cde9cf54c3b7e0d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2ef36b3ff546e648f9029da0e42cc4d7fbaced58112156788f30255bbf3ff595448030d827e608fb5ff596d79b3734394de734ca983a6397341a45c6015c1a5
|
7
|
+
data.tar.gz: 622f759d307cf57357b278c77f2310f1917281a459e6b15856c98811be1926a9344556cf08f4681b80cddc9632239192217d5c67d3184251da001386a25a1bb8
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
bandcamp-discover (0.1.7)
|
5
|
+
async
|
6
|
+
base64
|
7
|
+
concurrent-ruby
|
8
|
+
gli (~> 2.21.5)
|
9
|
+
logger
|
10
|
+
ostruct
|
11
|
+
playwright-ruby-client
|
12
|
+
sqlite3
|
13
|
+
|
14
|
+
GEM
|
15
|
+
remote: https://rubygems.org/
|
16
|
+
specs:
|
17
|
+
ast (2.4.2)
|
18
|
+
async (2.15.3)
|
19
|
+
console (~> 1.26)
|
20
|
+
fiber-annotation
|
21
|
+
io-event (~> 1.6, >= 1.6.5)
|
22
|
+
base64 (0.2.0)
|
23
|
+
concurrent-ruby (1.3.5)
|
24
|
+
console (1.27.0)
|
25
|
+
fiber-annotation
|
26
|
+
fiber-local (~> 1.1)
|
27
|
+
json
|
28
|
+
fiber-annotation (0.2.0)
|
29
|
+
fiber-local (1.1.0)
|
30
|
+
fiber-storage
|
31
|
+
fiber-storage (1.0.0)
|
32
|
+
gli (2.21.5)
|
33
|
+
io-event (1.6.5)
|
34
|
+
json (2.7.2)
|
35
|
+
language_server-protocol (3.17.0.3)
|
36
|
+
lint_roller (1.1.0)
|
37
|
+
logger (1.7.0)
|
38
|
+
mime-types (3.7.0)
|
39
|
+
logger
|
40
|
+
mime-types-data (~> 3.2025, >= 3.2025.0507)
|
41
|
+
mime-types-data (3.2025.0729)
|
42
|
+
mini_portile2 (2.8.7)
|
43
|
+
minitest (5.24.1)
|
44
|
+
ostruct (0.6.1)
|
45
|
+
parallel (1.25.1)
|
46
|
+
parser (3.3.4.0)
|
47
|
+
ast (~> 2.4.1)
|
48
|
+
racc
|
49
|
+
playwright-ruby-client (1.54.0)
|
50
|
+
concurrent-ruby (>= 1.1.6)
|
51
|
+
mime-types (>= 3.0)
|
52
|
+
psych (5.1.2)
|
53
|
+
stringio
|
54
|
+
racc (1.8.1)
|
55
|
+
rainbow (3.1.1)
|
56
|
+
rake (13.2.1)
|
57
|
+
rdoc (6.7.0)
|
58
|
+
psych (>= 4.0.0)
|
59
|
+
regexp_parser (2.9.2)
|
60
|
+
rexml (3.3.4)
|
61
|
+
strscan
|
62
|
+
rubocop (1.64.1)
|
63
|
+
json (~> 2.3)
|
64
|
+
language_server-protocol (>= 3.17.0)
|
65
|
+
parallel (~> 1.10)
|
66
|
+
parser (>= 3.3.0.2)
|
67
|
+
rainbow (>= 2.2.2, < 4.0)
|
68
|
+
regexp_parser (>= 1.8, < 3.0)
|
69
|
+
rexml (>= 3.2.5, < 4.0)
|
70
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
71
|
+
ruby-progressbar (~> 1.7)
|
72
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
73
|
+
rubocop-ast (1.31.3)
|
74
|
+
parser (>= 3.3.1.0)
|
75
|
+
rubocop-performance (1.21.1)
|
76
|
+
rubocop (>= 1.48.1, < 2.0)
|
77
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
78
|
+
ruby-progressbar (1.13.0)
|
79
|
+
sqlite3 (2.5.0)
|
80
|
+
mini_portile2 (~> 2.8.0)
|
81
|
+
sqlite3 (2.5.0-arm64-darwin)
|
82
|
+
standard (1.39.2)
|
83
|
+
language_server-protocol (~> 3.17.0.2)
|
84
|
+
lint_roller (~> 1.0)
|
85
|
+
rubocop (~> 1.64.0)
|
86
|
+
standard-custom (~> 1.0.0)
|
87
|
+
standard-performance (~> 1.4)
|
88
|
+
standard-custom (1.0.2)
|
89
|
+
lint_roller (~> 1.0)
|
90
|
+
rubocop (~> 1.50)
|
91
|
+
standard-performance (1.4.0)
|
92
|
+
lint_roller (~> 1.1)
|
93
|
+
rubocop-performance (~> 1.21.0)
|
94
|
+
stringio (3.1.1)
|
95
|
+
strscan (3.1.0)
|
96
|
+
unicode-display_width (2.5.0)
|
97
|
+
|
98
|
+
PLATFORMS
|
99
|
+
arm64-darwin-22
|
100
|
+
ruby
|
101
|
+
|
102
|
+
DEPENDENCIES
|
103
|
+
bandcamp-discover!
|
104
|
+
minitest
|
105
|
+
rake
|
106
|
+
rdoc
|
107
|
+
standard
|
108
|
+
|
109
|
+
BUNDLED WITH
|
110
|
+
2.5.9
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module BandcampDiscover
|
2
|
+
class Analyzer
|
3
|
+
def initialize(description)
|
4
|
+
@description = description
|
5
|
+
end
|
6
|
+
|
7
|
+
def label?
|
8
|
+
if defined?(OpenRouter) && !!OpenRouter.configuration.access_token
|
9
|
+
response = OpenRouter::Client.new.complete(
|
10
|
+
[
|
11
|
+
{ role: "system", content: "You are given a description that could or could not be that of a record label. Analyze and answer with true or false only. Be critical: Individuals and bands are not labels, but collectives can be labels." },
|
12
|
+
{ role: "user", content: @description }
|
13
|
+
],
|
14
|
+
model: [
|
15
|
+
"mistralai/mistral-small-3.2-24b-instruct:free"
|
16
|
+
],
|
17
|
+
extras: {
|
18
|
+
response_format: {
|
19
|
+
type: "json_object"
|
20
|
+
}
|
21
|
+
}
|
22
|
+
)
|
23
|
+
|
24
|
+
JSON.parse(response["choices"][0]["message"]["content"])["answer"]&.downcase == "true"
|
25
|
+
else
|
26
|
+
@description.match? /label|platform|records/i
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative "./base"
|
2
2
|
require_relative "./music"
|
3
|
+
require_relative "../analyzer"
|
3
4
|
|
4
5
|
module BandcampDiscover
|
5
6
|
module Scrapers
|
@@ -15,7 +16,7 @@ module BandcampDiscover
|
|
15
16
|
name = band_name_location_container.query_selector(".title").inner_text
|
16
17
|
location = band_name_location_container.query_selector(".location").inner_text
|
17
18
|
|
18
|
-
if force || bio_text&.inner_html
|
19
|
+
if force || Analyzer.new(bio_text&.inner_html).label?
|
19
20
|
return Sync do
|
20
21
|
music_tags = Scrapers::Music.new(url: "#{@url}/music", browser: @browser, max_tasks: @max_tasks).scrape
|
21
22
|
|
@@ -23,7 +23,7 @@ module BandcampDiscover
|
|
23
23
|
|
24
24
|
album_tags = album_links.take(20).map do |album_link|
|
25
25
|
semaphore.async do
|
26
|
-
url = "#{@base_url}#{album_link[:href]}"
|
26
|
+
url = album_link[:href].start_with?("https://") ? album_link[:href] : "#{@base_url}#{album_link[:href]}"
|
27
27
|
puts "starting to scrape #{url}"
|
28
28
|
|
29
29
|
tags = Scrapers::Album.new(url: url, browser: @browser).scrape
|
data/lib/bandcamp-discover.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'async'
|
2
2
|
require 'bandcamp-discover/version.rb'
|
3
|
+
require 'bandcamp-discover/analyzer.rb'
|
3
4
|
require 'bandcamp-discover/scrapers/discover.rb'
|
4
5
|
|
5
6
|
# Add requires for other files you add to your project here, so
|
6
7
|
# you just need to require this one file in your bin file
|
7
|
-
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bandcamp-discover
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julian RUbisch
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-08-17 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: rake
|
@@ -187,12 +187,14 @@ extra_rdoc_files:
|
|
187
187
|
files:
|
188
188
|
- ".gitignore"
|
189
189
|
- Gemfile
|
190
|
+
- Gemfile.lock
|
190
191
|
- README.rdoc
|
191
192
|
- Rakefile
|
192
193
|
- bandcamp-discover.gemspec
|
193
194
|
- bandcamp-discover.rdoc
|
194
195
|
- bin/bandcamp-discover
|
195
196
|
- lib/bandcamp-discover.rb
|
197
|
+
- lib/bandcamp-discover/analyzer.rb
|
196
198
|
- lib/bandcamp-discover/scrapers/album.rb
|
197
199
|
- lib/bandcamp-discover/scrapers/base.rb
|
198
200
|
- lib/bandcamp-discover/scrapers/discover.rb
|