langusta 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -16,7 +16,7 @@ Jeweler::Tasks.new do |gem|
16
16
  gem.homepage = "http://github.com/jasiek/langusta"
17
17
  gem.license = "Apache 2.0"
18
18
  gem.summary = %Q{Language detection library based on http://code.google.com/p/language-detection/.}
19
- gem.description = %Q{Uses naive bayesian filter.}
19
+ gem.description = %Q{Highly accurate language detection library, uses naive bayesian filter.}
20
20
  gem.email = "jan.szumiec@gmail.com"
21
21
  gem.authors = ["Jan Szumiec"]
22
22
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
data/langusta.gemspec CHANGED
@@ -5,13 +5,13 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{langusta}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jan Szumiec"]
12
- s.date = %q{2011-04-08}
12
+ s.date = %q{2011-04-10}
13
13
  s.default_executable = %q{langusta}
14
- s.description = %q{Uses naive bayesian filter.}
14
+ s.description = %q{Highly accurate language detection library, uses naive bayesian filter.}
15
15
  s.email = %q{jan.szumiec@gmail.com}
16
16
  s.executables = ["langusta"]
17
17
  s.extra_rdoc_files = [
data/lib/langusta.rb CHANGED
@@ -12,7 +12,7 @@ require 'oniguruma'
12
12
  require 'yajl'
13
13
 
14
14
  module Langusta
15
- VERSION = '0.1.0'
15
+ VERSION = '0.1.1'
16
16
 
17
17
  autoload :RegexHelper, 'langusta/regex_helper'
18
18
  autoload :UCS2String, 'langusta/ucs2_string'
@@ -32,5 +32,9 @@ module Langusta
32
32
  PROFILES_PATH = File.join(ABSOLUTE_PATH, 'profiles')
33
33
  UPPERCASE_BIN = File.join(ABSOLUTE_PATH, 'data/uppercase.bin')
34
34
  MESSAGES_PROPERTIES = File.join(ABSOLUTE_PATH, 'data/messages.properties')
35
+
36
+ class DuplicateProfilesError < StandardError; end
37
+ class NoProfilesLoadedError < StandardError; end
38
+ class NoFeaturesInTextError < StandardError; end
35
39
  end
36
40
 
@@ -45,7 +45,7 @@ module Langusta
45
45
  def detect_block
46
46
  cleaning_text()
47
47
  ngrams = extract_ngrams()
48
- raise "no features in text" if ngrams.empty?
48
+ raise NoFeaturesInTextError if ngrams.empty?
49
49
  @langprob = Array.new(@lang_list.length, 0.0)
50
50
 
51
51
  @n_trial.times do
@@ -68,6 +68,7 @@ module Langusta
68
68
  end
69
69
  end
70
70
 
71
+ # TODO: this looks like it's not referenced anywhere.
71
72
  def set_prior_map(prior_map)
72
73
  @prior_map = Array.new[@lang_list.length]
73
74
  sump = 0.0
@@ -99,7 +100,6 @@ module Langusta
99
100
  maxp
100
101
  end
101
102
 
102
- private
103
103
  def cleaning_text
104
104
  non_latin_count = latin_count = 0
105
105
  @text.each_char do |c|
@@ -1,6 +1,4 @@
1
1
  module Langusta
2
- class LangDetectException < StandardError; end
3
-
4
2
  class DetectorFactory
5
3
  attr_reader :word_lang_prob_map, :lang_list
6
4
 
@@ -14,7 +12,7 @@ module Langusta
14
12
  # @param [Fixnum] index at which the language profile is to be added.
15
13
  # @param [Fixnum] counts how many language profiles are to be added to this factory in total.
16
14
  def add_profile(profile, index, langsize)
17
- raise LangDetectException.new("duplicate the same language profile") if @lang_list.include?(profile.name)
15
+ raise DuplicateProfilesError.new(profile.name) if @lang_list.include?(profile.name)
18
16
  @lang_list << profile.name
19
17
  profile.freq.keys.each do |word|
20
18
  if not @word_lang_prob_map.has_key?(word)
@@ -39,7 +37,7 @@ module Langusta
39
37
 
40
38
  private
41
39
  def create_detector
42
- raise LangDetectException.new("need to load profiles") if @lang_list.length == 0
40
+ raise NoProfilesLoadedError if @lang_list.empty?
43
41
  detector = Detector.new(self)
44
42
  end
45
43
  end
@@ -49,4 +49,12 @@ class DetectorTest < Test::Unit::TestCase
49
49
  detector.append(UCS2String.new("\x30\x42\x30\x42\x30\x42\x30\x42\x00a"))
50
50
  assert_equal("jp", detector.detect())
51
51
  end
52
+
53
+ def test_exceptions
54
+ detector = @factory.create()
55
+ detector.append(UCS2String.new(''))
56
+ assert_raises(NoFeaturesInTextError) do
57
+ detector.detect()
58
+ end
59
+ end
52
60
  end
@@ -6,11 +6,23 @@ class DetectorFactoryTest < Test::Unit::TestCase
6
6
  factory = DetectorFactory.new
7
7
 
8
8
  factory.add_profile(profile, 0, 1)
9
- assert_raises(LangDetectException) do
10
- factory.add_profile(profile, 1, 1)
11
- end
12
9
 
13
10
  detector = factory.create(0.123)
14
11
  assert_equal(0.123, detector.alpha)
15
12
  end
13
+
14
+ def test_exceptions
15
+ profile = LangProfile.new
16
+ factory = DetectorFactory.new
17
+
18
+ assert_raises(NoProfilesLoadedError) do
19
+ factory.create()
20
+ end
21
+
22
+ factory.add_profile(profile, 0, 2)
23
+
24
+ assert_raises(DuplicateProfilesError) do
25
+ factory.add_profile(profile, 1, 2)
26
+ end
27
+ end
16
28
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langusta
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 0
10
- version: 0.1.0
9
+ - 1
10
+ version: 0.1.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jan Szumiec
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-08 00:00:00 +02:00
18
+ date: 2011-04-10 00:00:00 +02:00
19
19
  default_executable: langusta
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -124,7 +124,7 @@ dependencies:
124
124
  name: ruby-debug
125
125
  version_requirements: *id007
126
126
  prerelease: false
127
- description: Uses naive bayesian filter.
127
+ description: Highly accurate language detection library, uses naive bayesian filter.
128
128
  email: jan.szumiec@gmail.com
129
129
  executables:
130
130
  - langusta