langusta 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -16,7 +16,7 @@ Jeweler::Tasks.new do |gem|
16
16
  gem.homepage = "http://github.com/jasiek/langusta"
17
17
  gem.license = "Apache 2.0"
18
18
  gem.summary = %Q{Language detection library based on http://code.google.com/p/language-detection/.}
19
- gem.description = %Q{Uses naive bayesian filter.}
19
+ gem.description = %Q{Highly accurate language detection library, uses naive bayesian filter.}
20
20
  gem.email = "jan.szumiec@gmail.com"
21
21
  gem.authors = ["Jan Szumiec"]
22
22
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
data/langusta.gemspec CHANGED
@@ -5,13 +5,13 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{langusta}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jan Szumiec"]
12
- s.date = %q{2011-04-08}
12
+ s.date = %q{2011-04-10}
13
13
  s.default_executable = %q{langusta}
14
- s.description = %q{Uses naive bayesian filter.}
14
+ s.description = %q{Highly accurate language detection library, uses naive bayesian filter.}
15
15
  s.email = %q{jan.szumiec@gmail.com}
16
16
  s.executables = ["langusta"]
17
17
  s.extra_rdoc_files = [
data/lib/langusta.rb CHANGED
@@ -12,7 +12,7 @@ require 'oniguruma'
12
12
  require 'yajl'
13
13
 
14
14
  module Langusta
15
- VERSION = '0.1.0'
15
+ VERSION = '0.1.1'
16
16
 
17
17
  autoload :RegexHelper, 'langusta/regex_helper'
18
18
  autoload :UCS2String, 'langusta/ucs2_string'
@@ -32,5 +32,9 @@ module Langusta
32
32
  PROFILES_PATH = File.join(ABSOLUTE_PATH, 'profiles')
33
33
  UPPERCASE_BIN = File.join(ABSOLUTE_PATH, 'data/uppercase.bin')
34
34
  MESSAGES_PROPERTIES = File.join(ABSOLUTE_PATH, 'data/messages.properties')
35
+
36
+ class DuplicateProfilesError < StandardError; end
37
+ class NoProfilesLoadedError < StandardError; end
38
+ class NoFeaturesInTextError < StandardError; end
35
39
  end
36
40
 
@@ -45,7 +45,7 @@ module Langusta
45
45
  def detect_block
46
46
  cleaning_text()
47
47
  ngrams = extract_ngrams()
48
- raise "no features in text" if ngrams.empty?
48
+ raise NoFeaturesInTextError if ngrams.empty?
49
49
  @langprob = Array.new(@lang_list.length, 0.0)
50
50
 
51
51
  @n_trial.times do
@@ -68,6 +68,7 @@ module Langusta
68
68
  end
69
69
  end
70
70
 
71
+ # TODO: this looks like it's not referenced anywhere.
71
72
  def set_prior_map(prior_map)
72
73
  @prior_map = Array.new[@lang_list.length]
73
74
  sump = 0.0
@@ -99,7 +100,6 @@ module Langusta
99
100
  maxp
100
101
  end
101
102
 
102
- private
103
103
  def cleaning_text
104
104
  non_latin_count = latin_count = 0
105
105
  @text.each_char do |c|
@@ -1,6 +1,4 @@
1
1
  module Langusta
2
- class LangDetectException < StandardError; end
3
-
4
2
  class DetectorFactory
5
3
  attr_reader :word_lang_prob_map, :lang_list
6
4
 
@@ -14,7 +12,7 @@ module Langusta
14
12
  # @param [Fixnum] index at which the language profile is to be added.
15
13
  # @param [Fixnum] counts how many language profiles are to be added to this factory in total.
16
14
  def add_profile(profile, index, langsize)
17
- raise LangDetectException.new("duplicate the same language profile") if @lang_list.include?(profile.name)
15
+ raise DuplicateProfilesError.new(profile.name) if @lang_list.include?(profile.name)
18
16
  @lang_list << profile.name
19
17
  profile.freq.keys.each do |word|
20
18
  if not @word_lang_prob_map.has_key?(word)
@@ -39,7 +37,7 @@ module Langusta
39
37
 
40
38
  private
41
39
  def create_detector
42
- raise LangDetectException.new("need to load profiles") if @lang_list.length == 0
40
+ raise NoProfilesLoadedError if @lang_list.empty?
43
41
  detector = Detector.new(self)
44
42
  end
45
43
  end
@@ -49,4 +49,12 @@ class DetectorTest < Test::Unit::TestCase
49
49
  detector.append(UCS2String.new("\x30\x42\x30\x42\x30\x42\x30\x42\x00a"))
50
50
  assert_equal("jp", detector.detect())
51
51
  end
52
+
53
+ def test_exceptions
54
+ detector = @factory.create()
55
+ detector.append(UCS2String.new(''))
56
+ assert_raises(NoFeaturesInTextError) do
57
+ detector.detect()
58
+ end
59
+ end
52
60
  end
@@ -6,11 +6,23 @@ class DetectorFactoryTest < Test::Unit::TestCase
6
6
  factory = DetectorFactory.new
7
7
 
8
8
  factory.add_profile(profile, 0, 1)
9
- assert_raises(LangDetectException) do
10
- factory.add_profile(profile, 1, 1)
11
- end
12
9
 
13
10
  detector = factory.create(0.123)
14
11
  assert_equal(0.123, detector.alpha)
15
12
  end
13
+
14
+ def test_exceptions
15
+ profile = LangProfile.new
16
+ factory = DetectorFactory.new
17
+
18
+ assert_raises(NoProfilesLoadedError) do
19
+ factory.create()
20
+ end
21
+
22
+ factory.add_profile(profile, 0, 2)
23
+
24
+ assert_raises(DuplicateProfilesError) do
25
+ factory.add_profile(profile, 1, 2)
26
+ end
27
+ end
16
28
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langusta
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 0
10
- version: 0.1.0
9
+ - 1
10
+ version: 0.1.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jan Szumiec
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-08 00:00:00 +02:00
18
+ date: 2011-04-10 00:00:00 +02:00
19
19
  default_executable: langusta
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -124,7 +124,7 @@ dependencies:
124
124
  name: ruby-debug
125
125
  version_requirements: *id007
126
126
  prerelease: false
127
- description: Uses naive bayesian filter.
127
+ description: Highly accurate language detection library, uses naive bayesian filter.
128
128
  email: jan.szumiec@gmail.com
129
129
  executables:
130
130
  - langusta