sweeper 0.2.4 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/CHANGELOG +2 -0
- data/lib/sweeper.rb +38 -14
- data/sweeper.gemspec +5 -5
- data/test/integration/sweeper_test.rb +7 -0
- metadata +3 -3
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/CHANGELOG
CHANGED
data/lib/sweeper.rb
CHANGED
@@ -5,7 +5,9 @@ require 'xsd/mapping'
|
|
5
5
|
require 'activesupport'
|
6
6
|
require 'open-uri'
|
7
7
|
require 'uri'
|
8
|
-
require '
|
8
|
+
require 'Text'
|
9
|
+
|
10
|
+
require 'ruby-debug' if ENV['DEBUG']
|
9
11
|
|
10
12
|
class ID3Lib::Tag
|
11
13
|
def url
|
@@ -36,8 +38,8 @@ class Sweeper
|
|
36
38
|
def initialize(options = {})
|
37
39
|
@dir = File.expand_path(options['dir'] || Dir.pwd)
|
38
40
|
@options = options
|
39
|
-
@
|
40
|
-
@
|
41
|
+
@errf = Tempfile.new("stderr")
|
42
|
+
@match_cache = {}
|
41
43
|
end
|
42
44
|
|
43
45
|
# Run the Sweeper according to the <tt>options</tt>.
|
@@ -94,7 +96,7 @@ class Sweeper
|
|
94
96
|
|
95
97
|
rescue Problem => e
|
96
98
|
tries += 1 and retry if tries < 2
|
97
|
-
puts "Skipped (#{e.message.gsub(
|
99
|
+
puts "Skipped (#{e.message.gsub("\n", " ")}): #{File.basename(filename)}"
|
98
100
|
@failed += 1
|
99
101
|
end
|
100
102
|
end
|
@@ -194,23 +196,19 @@ class Sweeper
|
|
194
196
|
|
195
197
|
primary = nil
|
196
198
|
genres.each_with_index do |this, index|
|
197
|
-
|
198
|
-
# Get the levenshtein best-match weight
|
199
|
-
max = match_results.max
|
200
|
-
# Reverse lookup the canonical genre
|
201
|
-
match = GENRES[match_results.index(max)]
|
199
|
+
match, weight = nearest_genre(this)
|
202
200
|
# Bias slightly towards higher tagging counts
|
203
|
-
|
201
|
+
weight += ((GENRE_COUNT - index) / GENRE_COUNT / 4.0)
|
204
202
|
|
205
203
|
if ['Rock', 'Pop', 'Rap'].include? match
|
206
204
|
# Penalize useless genres
|
207
|
-
|
205
|
+
weight = weight / 3.0
|
208
206
|
end
|
209
207
|
|
210
|
-
p [
|
208
|
+
p [weight, match] if ENV['DEBUG']
|
211
209
|
|
212
|
-
if !primary or primary.first <
|
213
|
-
primary = [
|
210
|
+
if !primary or primary.first < weight
|
211
|
+
primary = [weight, match]
|
214
212
|
end
|
215
213
|
end
|
216
214
|
|
@@ -255,4 +253,30 @@ class Sweeper
|
|
255
253
|
ID3Lib::Tag.new(filename, ID3Lib::V_ALL)
|
256
254
|
end
|
257
255
|
|
256
|
+
def nearest_genre(string)
|
257
|
+
@match_cache[string] ||= begin
|
258
|
+
results = {}
|
259
|
+
GENRES.each do |genre|
|
260
|
+
results[Text::Levenshtein.distance(genre, string)] = genre
|
261
|
+
end
|
262
|
+
min = results.keys.min
|
263
|
+
match = results[min]
|
264
|
+
|
265
|
+
[match, normalize(match, string, min)]
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
def normalize(genre, string, weight)
|
270
|
+
# XXX Algorithm may not be right
|
271
|
+
if weight == 0
|
272
|
+
1.0
|
273
|
+
elsif weight >= genre.size
|
274
|
+
0.0
|
275
|
+
elsif genre.size >= string.size
|
276
|
+
1.0 - (weight / genre.size.to_f)
|
277
|
+
else
|
278
|
+
1.0 - (weight / string.size.to_f)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
258
282
|
end
|
data/sweeper.gemspec
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
|
2
|
-
# Gem::Specification for Sweeper-0.
|
2
|
+
# Gem::Specification for Sweeper-0.3
|
3
3
|
# Originally generated by Echoe
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = %q{sweeper}
|
7
|
-
s.version = "0.
|
7
|
+
s.version = "0.3"
|
8
8
|
|
9
9
|
s.specification_version = 2 if s.respond_to? :specification_version=
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.authors = ["Evan Weaver"]
|
13
|
-
s.date = %q{2008-04-
|
13
|
+
s.date = %q{2008-04-16}
|
14
14
|
s.default_executable = %q{sweeper}
|
15
15
|
s.description = %q{Automatically tag your music collection with metadata from Last.fm.}
|
16
16
|
s.email = %q{}
|
@@ -29,7 +29,7 @@ Gem::Specification.new do |s|
|
|
29
29
|
|
30
30
|
s.add_dependency(%q<id3lib-ruby>, [">= 0"])
|
31
31
|
s.add_dependency(%q<choice>, [">= 0"])
|
32
|
-
s.add_dependency(%q<
|
32
|
+
s.add_dependency(%q<Text>, [">= 0"])
|
33
33
|
s.add_dependency(%q<activesupport>, [">= 0"])
|
34
34
|
end
|
35
35
|
|
@@ -44,7 +44,7 @@ end
|
|
44
44
|
# p.summary = "Automatically tag your music collection with metadata from Last.fm."
|
45
45
|
# p.url = "http://blog.evanweaver.com/files/doc/fauna/sweeper/"
|
46
46
|
# p.docs_host = "blog.evanweaver.com:~/www/bax/public/files/doc/"
|
47
|
-
# p.dependencies = ['id3lib-ruby', 'choice', '
|
47
|
+
# p.dependencies = ['id3lib-ruby', 'choice', 'Text', 'activesupport']
|
48
48
|
# p.clean_pattern = ['doc', 'pkg', 'test/integration/songs']
|
49
49
|
# p.rdoc_pattern = ['README', 'LICENSE', 'CHANGELOG', 'TODO', 'lib/*']
|
50
50
|
# end
|
@@ -41,6 +41,13 @@ class SweeperTest < Test::Unit::TestCase
|
|
41
41
|
)
|
42
42
|
end
|
43
43
|
|
44
|
+
def test_genre_weighting
|
45
|
+
match, weight = @s.nearest_genre('psychedelic')
|
46
|
+
assert_equal match, 'Psychadelic'
|
47
|
+
assert(weight < 1)
|
48
|
+
assert(weight > 0)
|
49
|
+
end
|
50
|
+
|
44
51
|
def test_read
|
45
52
|
assert_equal({},
|
46
53
|
@s.read(@found_many))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sweeper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: "0.3"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Evan Weaver
|
@@ -30,7 +30,7 @@ cert_chain:
|
|
30
30
|
yZ0=
|
31
31
|
-----END CERTIFICATE-----
|
32
32
|
|
33
|
-
date: 2008-04-
|
33
|
+
date: 2008-04-16 00:00:00 -04:00
|
34
34
|
default_executable:
|
35
35
|
dependencies:
|
36
36
|
- !ruby/object:Gem::Dependency
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
version: "0"
|
53
53
|
version:
|
54
54
|
- !ruby/object:Gem::Dependency
|
55
|
-
name:
|
55
|
+
name: Text
|
56
56
|
version_requirement:
|
57
57
|
version_requirements: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
metadata.gz.sig
CHANGED
Binary file
|