ruby_ngrams_language_detector 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/spec/english.txt ADDED
@@ -0,0 +1 @@
1
+ this is a test of the Emergency text categorizing system.
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+ describe LanguageDetector do
3
+ describe "Test is_valid_character() method" do
4
+ before do
5
+ LanguageDetector::Detector.train
6
+ @language_detector = LanguageDetector::Detector.new
7
+ end
8
+
9
+ it "Test detect spanish" do
10
+ @language_detector.detect_language("spec/spanish.txt").should eql "es"
11
+ end
12
+
13
+ it "Test detect english" do
14
+ @language_detector.detect_language("spec/english.txt").should eql "en"
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,104 @@
1
+ require 'spec_helper'
2
+ describe LanguageDetector::Profile do
3
+ describe "Test is_valid_character() method" do
4
+ before do
5
+ @profile = LanguageDetector::Profile.new("test")
6
+ end
7
+
8
+ it "Test '.' is not a valid character" do
9
+ @profile.is_valid_character?(?.).should be_false
10
+ end
11
+
12
+ it "Test ',' is not a valid character" do
13
+ @profile.is_valid_character?(?,).should be_false
14
+ end
15
+
16
+ it "Test ':' is not a valid character" do
17
+ @profile.is_valid_character?(?:).should be_false
18
+ end
19
+
20
+ it "Test ';' is not a valid character" do
21
+ @profile.is_valid_character?(?;).should be_false
22
+ end
23
+
24
+ it "Test 'A' is not a valid character" do
25
+ @profile.is_valid_character?(?A).should be_false
26
+ end
27
+
28
+ it "Test 'a' is a valid character" do
29
+ @profile.is_valid_character?(?a).should be_true
30
+ end
31
+ end
32
+
33
+ describe "Test tokenize() method" do
34
+ before do
35
+ @profile = LanguageDetector::Profile.new("test")
36
+ end
37
+
38
+ it "Test '.' is not a valid character" do
39
+ @profile.tokenize("this is; ,+_ A \t 123 test:").should match_array(["this", "is", "a", "test"])
40
+ end
41
+ end
42
+
43
+ describe "Test count_ngram() method" do
44
+ before do
45
+ @profile = LanguageDetector::Profile.new("test")
46
+ end
47
+
48
+ it "Test 1" do
49
+ @profile.count_ngram('words', 1, {}).should include("w"=>1, "o"=>1, "r"=>1, "d"=>1, "s"=>1)
50
+ end
51
+
52
+ it "Test 2" do
53
+ @profile.count_ngram('words', 2, {}).should include("wo"=>1, "or"=>1, "rd"=>1, "ds"=>1, "_w" => 1, "s_" => 1)
54
+ end
55
+
56
+ it "Test 3" do
57
+ @profile.count_ngram('words', 3, {}).should include("wor"=>1, "ord"=>1, "rds"=>1, "_wo" => 1, "ds_" => 1, "s__" => 1)
58
+ end
59
+
60
+ it "Test 4" do
61
+ @profile.count_ngram('words', 4, {}).should include("word"=>1, "ords"=>1, "_wor" => 1, "rds_" => 1, "ds__" => 1, "s___" => 1)
62
+ end
63
+
64
+ it "Test 5" do
65
+ @profile.count_ngram('words', 5, {}).should include("words"=>1, "_word" => 1, "ords_" => 1, "rds__" => 1, "ds___" => 1, "s____" => 1)
66
+ end
67
+
68
+ it "Test 6" do
69
+ @profile.count_ngram('words', 6, {}).should include()
70
+ end
71
+ end
72
+
73
+ describe "Test init_with_string() method" do
74
+ before do
75
+ @profile = LanguageDetector::Profile.new("test")
76
+ end
77
+
78
+ it "Test 1" do
79
+ @profile.init_with_string("this is; ,+_ A \t 123 test:")
80
+ @profile.ngrams.should include("_t"=>1, "s_"=>2, "is"=>3, "_i"=>4, "th"=>5, "_th"=>6, "thi"=>7, "his"=>8, "is_"=>9, "s__"=>10, "_thi"=>11, "this"=>12, "his_"=>13, "is__"=>14, "s___"=>15, "hi"=>16, "te"=>17, "es"=>18, "st"=>19, "t_"=>20, "_te"=>21, "tes"=>22, "est"=>23, "st_"=>24, "t__"=>25, "_tes"=>26, "test"=>27, "est_"=>28, "st__"=>29, "t___"=>30)
81
+ end
82
+ end
83
+
84
+ describe "Test compute_distance() method" do
85
+ before do
86
+ @profile1 = LanguageDetector::Profile.new("test")
87
+ @profile1.init_with_string("this is ,+_ A \t 123 test")
88
+
89
+ @profile2 = LanguageDetector::Profile.new("test")
90
+ @profile2.init_with_string("this is ,+_ A \t 123 test")
91
+
92
+ @profile3 = LanguageDetector::Profile.new("test")
93
+ @profile3.init_with_string("xxxx")
94
+ end
95
+
96
+ it "Test 1" do
97
+ @profile1.compute_distance(@profile2).should eql 0
98
+ end
99
+
100
+ it "Test 2" do
101
+ @profile1.compute_distance(@profile3).should eql 24000
102
+ end
103
+ end
104
+ end
data/spec/spanish.txt ADDED
@@ -0,0 +1 @@
1
+ para poner este importante proyecto en
@@ -0,0 +1,3 @@
1
+ require_relative '../lib/language_detector/profile'
2
+ require_relative '../lib/language_detector'
3
+ require 'yaml'
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby_ngrams_language_detector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - cexposito
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-02-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2.6'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '2.6'
30
+ description: ngram based language detector written in ruby
31
+ email:
32
+ - carlosexposito68@gmail.com
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - LICENSE.txt
40
+ - README.md
41
+ - Rakefile
42
+ - language_detector.gemspec
43
+ - lib/language_detector.rb
44
+ - lib/language_detector/profile.rb
45
+ - lib/language_detector/training_data/english.txt
46
+ - lib/language_detector/training_data/spanish.txt
47
+ - lib/language_detector/version.rb
48
+ - lib/model.yml
49
+ - spec/english.txt
50
+ - spec/language_detector_spec.rb
51
+ - spec/profile_spec.rb
52
+ - spec/spanish.txt
53
+ - spec/spec_helper.rb
54
+ homepage: ''
55
+ licenses: []
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ! '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubyforge_project:
74
+ rubygems_version: 1.8.25
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: ngram based language detector
78
+ test_files:
79
+ - spec/english.txt
80
+ - spec/language_detector_spec.rb
81
+ - spec/profile_spec.rb
82
+ - spec/spanish.txt
83
+ - spec/spec_helper.rb