sweeper 0.2.4 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
data/CHANGELOG CHANGED
@@ -1,4 +1,6 @@
1
1
 
2
+ v0.3. Pure Ruby Levenshtein algorithm, for Windows support.
3
+
2
4
  v0.2.4. More robust XSD error handling.
3
5
 
4
6
  v0.2.3. Rescue another OpenURI error.
@@ -5,7 +5,9 @@ require 'xsd/mapping'
5
5
  require 'activesupport'
6
6
  require 'open-uri'
7
7
  require 'uri'
8
- require 'amatch'
8
+ require 'Text'
9
+
10
+ require 'ruby-debug' if ENV['DEBUG']
9
11
 
10
12
  class ID3Lib::Tag
11
13
  def url
@@ -36,8 +38,8 @@ class Sweeper
36
38
  def initialize(options = {})
37
39
  @dir = File.expand_path(options['dir'] || Dir.pwd)
38
40
  @options = options
39
- @outf = Tempfile.new("stdout")
40
- @errf = Tempfile.new("stderr")
41
+ @errf = Tempfile.new("stderr")
42
+ @match_cache = {}
41
43
  end
42
44
 
43
45
  # Run the Sweeper according to the <tt>options</tt>.
@@ -94,7 +96,7 @@ class Sweeper
94
96
 
95
97
  rescue Problem => e
96
98
  tries += 1 and retry if tries < 2
97
- puts "Skipped (#{e.message.gsub('\n', ' ')}): #{File.basename(filename)}"
99
+ puts "Skipped (#{e.message.gsub("\n", " ")}): #{File.basename(filename)}"
98
100
  @failed += 1
99
101
  end
100
102
  end
@@ -194,23 +196,19 @@ class Sweeper
194
196
 
195
197
  primary = nil
196
198
  genres.each_with_index do |this, index|
197
- match_results = Amatch::Levenshtein.new(this).similar(GENRES)
198
- # Get the levenshtein best-match weight
199
- max = match_results.max
200
- # Reverse lookup the canonical genre
201
- match = GENRES[match_results.index(max)]
199
+ match, weight = nearest_genre(this)
202
200
  # Bias slightly towards higher tagging counts
203
- max += ((GENRE_COUNT - index) / GENRE_COUNT / 4.0)
201
+ weight += ((GENRE_COUNT - index) / GENRE_COUNT / 4.0)
204
202
 
205
203
  if ['Rock', 'Pop', 'Rap'].include? match
206
204
  # Penalize useless genres
207
- max = max / 3.0
205
+ weight = weight / 3.0
208
206
  end
209
207
 
210
- p [max, match] if ENV['DEBUG']
208
+ p [weight, match] if ENV['DEBUG']
211
209
 
212
- if !primary or primary.first < max
213
- primary = [max, match]
210
+ if !primary or primary.first < weight
211
+ primary = [weight, match]
214
212
  end
215
213
  end
216
214
 
@@ -255,4 +253,30 @@ class Sweeper
255
253
  ID3Lib::Tag.new(filename, ID3Lib::V_ALL)
256
254
  end
257
255
 
256
+ def nearest_genre(string)
257
+ @match_cache[string] ||= begin
258
+ results = {}
259
+ GENRES.each do |genre|
260
+ results[Text::Levenshtein.distance(genre, string)] = genre
261
+ end
262
+ min = results.keys.min
263
+ match = results[min]
264
+
265
+ [match, normalize(match, string, min)]
266
+ end
267
+ end
268
+
269
+ def normalize(genre, string, weight)
270
+ # XXX Algorithm may not be right
271
+ if weight == 0
272
+ 1.0
273
+ elsif weight >= genre.size
274
+ 0.0
275
+ elsif genre.size >= string.size
276
+ 1.0 - (weight / genre.size.to_f)
277
+ else
278
+ 1.0 - (weight / string.size.to_f)
279
+ end
280
+ end
281
+
258
282
  end
@@ -1,16 +1,16 @@
1
1
 
2
- # Gem::Specification for Sweeper-0.2.4
2
+ # Gem::Specification for Sweeper-0.3
3
3
  # Originally generated by Echoe
4
4
 
5
5
  Gem::Specification.new do |s|
6
6
  s.name = %q{sweeper}
7
- s.version = "0.2.4"
7
+ s.version = "0.3"
8
8
 
9
9
  s.specification_version = 2 if s.respond_to? :specification_version=
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.authors = ["Evan Weaver"]
13
- s.date = %q{2008-04-13}
13
+ s.date = %q{2008-04-16}
14
14
  s.default_executable = %q{sweeper}
15
15
  s.description = %q{Automatically tag your music collection with metadata from Last.fm.}
16
16
  s.email = %q{}
@@ -29,7 +29,7 @@ Gem::Specification.new do |s|
29
29
 
30
30
  s.add_dependency(%q<id3lib-ruby>, [">= 0"])
31
31
  s.add_dependency(%q<choice>, [">= 0"])
32
- s.add_dependency(%q<amatch>, [">= 0"])
32
+ s.add_dependency(%q<Text>, [">= 0"])
33
33
  s.add_dependency(%q<activesupport>, [">= 0"])
34
34
  end
35
35
 
@@ -44,7 +44,7 @@ end
44
44
  # p.summary = "Automatically tag your music collection with metadata from Last.fm."
45
45
  # p.url = "http://blog.evanweaver.com/files/doc/fauna/sweeper/"
46
46
  # p.docs_host = "blog.evanweaver.com:~/www/bax/public/files/doc/"
47
- # p.dependencies = ['id3lib-ruby', 'choice', 'amatch', 'activesupport']
47
+ # p.dependencies = ['id3lib-ruby', 'choice', 'Text', 'activesupport']
48
48
  # p.clean_pattern = ['doc', 'pkg', 'test/integration/songs']
49
49
  # p.rdoc_pattern = ['README', 'LICENSE', 'CHANGELOG', 'TODO', 'lib/*']
50
50
  # end
@@ -41,6 +41,13 @@ class SweeperTest < Test::Unit::TestCase
41
41
  )
42
42
  end
43
43
 
44
+ def test_genre_weighting
45
+ match, weight = @s.nearest_genre('psychedelic')
46
+ assert_equal match, 'Psychadelic'
47
+ assert(weight < 1)
48
+ assert(weight > 0)
49
+ end
50
+
44
51
  def test_read
45
52
  assert_equal({},
46
53
  @s.read(@found_many))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sweeper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: "0.3"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Evan Weaver
@@ -30,7 +30,7 @@ cert_chain:
30
30
  yZ0=
31
31
  -----END CERTIFICATE-----
32
32
 
33
- date: 2008-04-13 00:00:00 -04:00
33
+ date: 2008-04-16 00:00:00 -04:00
34
34
  default_executable:
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
@@ -52,7 +52,7 @@ dependencies:
52
52
  version: "0"
53
53
  version:
54
54
  - !ruby/object:Gem::Dependency
55
- name: amatch
55
+ name: Text
56
56
  version_requirement:
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
metadata.gz.sig CHANGED
Binary file