sweeper 0.2.4 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data.tar.gz.sig CHANGED
Binary file
data/CHANGELOG CHANGED
@@ -1,4 +1,6 @@
1
1
 
2
+ v0.3. Pure Ruby Levenshtein algorithm, for Windows support.
3
+
2
4
  v0.2.4. More robust XSD error handling.
3
5
 
4
6
  v0.2.3. Rescue another OpenURI error.
@@ -5,7 +5,9 @@ require 'xsd/mapping'
5
5
  require 'activesupport'
6
6
  require 'open-uri'
7
7
  require 'uri'
8
- require 'amatch'
8
+ require 'Text'
9
+
10
+ require 'ruby-debug' if ENV['DEBUG']
9
11
 
10
12
  class ID3Lib::Tag
11
13
  def url
@@ -36,8 +38,8 @@ class Sweeper
36
38
  def initialize(options = {})
37
39
  @dir = File.expand_path(options['dir'] || Dir.pwd)
38
40
  @options = options
39
- @outf = Tempfile.new("stdout")
40
- @errf = Tempfile.new("stderr")
41
+ @errf = Tempfile.new("stderr")
42
+ @match_cache = {}
41
43
  end
42
44
 
43
45
  # Run the Sweeper according to the <tt>options</tt>.
@@ -94,7 +96,7 @@ class Sweeper
94
96
 
95
97
  rescue Problem => e
96
98
  tries += 1 and retry if tries < 2
97
- puts "Skipped (#{e.message.gsub('\n', ' ')}): #{File.basename(filename)}"
99
+ puts "Skipped (#{e.message.gsub("\n", " ")}): #{File.basename(filename)}"
98
100
  @failed += 1
99
101
  end
100
102
  end
@@ -194,23 +196,19 @@ class Sweeper
194
196
 
195
197
  primary = nil
196
198
  genres.each_with_index do |this, index|
197
- match_results = Amatch::Levenshtein.new(this).similar(GENRES)
198
- # Get the levenshtein best-match weight
199
- max = match_results.max
200
- # Reverse lookup the canonical genre
201
- match = GENRES[match_results.index(max)]
199
+ match, weight = nearest_genre(this)
202
200
  # Bias slightly towards higher tagging counts
203
- max += ((GENRE_COUNT - index) / GENRE_COUNT / 4.0)
201
+ weight += ((GENRE_COUNT - index) / GENRE_COUNT / 4.0)
204
202
 
205
203
  if ['Rock', 'Pop', 'Rap'].include? match
206
204
  # Penalize useless genres
207
- max = max / 3.0
205
+ weight = weight / 3.0
208
206
  end
209
207
 
210
- p [max, match] if ENV['DEBUG']
208
+ p [weight, match] if ENV['DEBUG']
211
209
 
212
- if !primary or primary.first < max
213
- primary = [max, match]
210
+ if !primary or primary.first < weight
211
+ primary = [weight, match]
214
212
  end
215
213
  end
216
214
 
@@ -255,4 +253,30 @@ class Sweeper
255
253
  ID3Lib::Tag.new(filename, ID3Lib::V_ALL)
256
254
  end
257
255
 
256
+ def nearest_genre(string)
257
+ @match_cache[string] ||= begin
258
+ results = {}
259
+ GENRES.each do |genre|
260
+ results[Text::Levenshtein.distance(genre, string)] = genre
261
+ end
262
+ min = results.keys.min
263
+ match = results[min]
264
+
265
+ [match, normalize(match, string, min)]
266
+ end
267
+ end
268
+
269
+ def normalize(genre, string, weight)
270
+ # XXX Algorithm may not be right
271
+ if weight == 0
272
+ 1.0
273
+ elsif weight >= genre.size
274
+ 0.0
275
+ elsif genre.size >= string.size
276
+ 1.0 - (weight / genre.size.to_f)
277
+ else
278
+ 1.0 - (weight / string.size.to_f)
279
+ end
280
+ end
281
+
258
282
  end
@@ -1,16 +1,16 @@
1
1
 
2
- # Gem::Specification for Sweeper-0.2.4
2
+ # Gem::Specification for Sweeper-0.3
3
3
  # Originally generated by Echoe
4
4
 
5
5
  Gem::Specification.new do |s|
6
6
  s.name = %q{sweeper}
7
- s.version = "0.2.4"
7
+ s.version = "0.3"
8
8
 
9
9
  s.specification_version = 2 if s.respond_to? :specification_version=
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.authors = ["Evan Weaver"]
13
- s.date = %q{2008-04-13}
13
+ s.date = %q{2008-04-16}
14
14
  s.default_executable = %q{sweeper}
15
15
  s.description = %q{Automatically tag your music collection with metadata from Last.fm.}
16
16
  s.email = %q{}
@@ -29,7 +29,7 @@ Gem::Specification.new do |s|
29
29
 
30
30
  s.add_dependency(%q<id3lib-ruby>, [">= 0"])
31
31
  s.add_dependency(%q<choice>, [">= 0"])
32
- s.add_dependency(%q<amatch>, [">= 0"])
32
+ s.add_dependency(%q<Text>, [">= 0"])
33
33
  s.add_dependency(%q<activesupport>, [">= 0"])
34
34
  end
35
35
 
@@ -44,7 +44,7 @@ end
44
44
  # p.summary = "Automatically tag your music collection with metadata from Last.fm."
45
45
  # p.url = "http://blog.evanweaver.com/files/doc/fauna/sweeper/"
46
46
  # p.docs_host = "blog.evanweaver.com:~/www/bax/public/files/doc/"
47
- # p.dependencies = ['id3lib-ruby', 'choice', 'amatch', 'activesupport']
47
+ # p.dependencies = ['id3lib-ruby', 'choice', 'Text', 'activesupport']
48
48
  # p.clean_pattern = ['doc', 'pkg', 'test/integration/songs']
49
49
  # p.rdoc_pattern = ['README', 'LICENSE', 'CHANGELOG', 'TODO', 'lib/*']
50
50
  # end
@@ -41,6 +41,13 @@ class SweeperTest < Test::Unit::TestCase
41
41
  )
42
42
  end
43
43
 
44
+ def test_genre_weighting
45
+ match, weight = @s.nearest_genre('psychedelic')
46
+ assert_equal match, 'Psychadelic'
47
+ assert(weight < 1)
48
+ assert(weight > 0)
49
+ end
50
+
44
51
  def test_read
45
52
  assert_equal({},
46
53
  @s.read(@found_many))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sweeper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: "0.3"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Evan Weaver
@@ -30,7 +30,7 @@ cert_chain:
30
30
  yZ0=
31
31
  -----END CERTIFICATE-----
32
32
 
33
- date: 2008-04-13 00:00:00 -04:00
33
+ date: 2008-04-16 00:00:00 -04:00
34
34
  default_executable:
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
@@ -52,7 +52,7 @@ dependencies:
52
52
  version: "0"
53
53
  version:
54
54
  - !ruby/object:Gem::Dependency
55
- name: amatch
55
+ name: Text
56
56
  version_requirement:
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
metadata.gz.sig CHANGED
Binary file