peterc-whatlanguage 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.txt CHANGED
@@ -7,9 +7,10 @@ whatlanguage
7
7
 
8
8
  Text language detection. Quick, fast, memory efficient, and all in pure Ruby. Uses Bloom filters for aforementioned speed and memory benefits.
9
9
 
10
+ Works with Dutch, English, Farsi, French, German, Swedish, Portuguese, Russian and Spanish out of the box.
11
+
10
12
  == FEATURES/PROBLEMS:
11
13
 
12
- * Only does French, English and Spanish out of the box. Very easy to train new languages though.
13
14
  * It can be made far more efficient at the comparison stage, but all in good time..! It still beats literal dictionary approaches.
14
15
  * No filter selection yet, you get 'em all loaded.
15
16
  * Tests are reasonably light.
@@ -45,6 +46,16 @@ Text language detection. Quick, fast, memory efficient, and all in pure Ruby. Us
45
46
 
46
47
  * None, minor libraries (BloominSimple and BitField) included with this release.
47
48
 
49
+ == INSTALLATION:
50
+
51
+ gem sources -a http://gems.github.com
52
+ sudo gem install peterc-whatlanguage
53
+
54
+ To test, go into irb, then:
55
+
56
+ require 'whatlanguage'
57
+ "Je suis un homme".language
58
+
48
59
  == LICENSE:
49
60
 
50
61
  (The MIT License)
data/build_filter.rb CHANGED
@@ -6,4 +6,4 @@
6
6
 
7
7
  require 'lib/whatlanguage'
8
8
  filter = WhatLanguage.filter_from_dictionary(ARGV[0])
9
- File.open(ARGV[1], 'w') { |f| f.write filter.dump }
9
+ File.open(ARGV[1], 'wb') { |f| f.write filter.dump }
@@ -6,7 +6,8 @@ languages_folder = File.join(File.dirname(__FILE__), "lang")
6
6
  wordlists_folder = File.join(File.dirname(__FILE__), "wordlists")
7
7
 
8
8
  Dir.entries(wordlists_folder).grep(/\w/).each do |lang|
9
+ next if lang == 'generators'
9
10
  puts "Doing #{lang}"
10
11
  filter = WhatLanguage.filter_from_dictionary(File.join(wordlists_folder, lang))
11
- File.open(File.join(languages_folder, lang + ".lang"), 'w') { |f| f.write filter.dump }
12
- end
12
+ File.open(File.join(languages_folder, lang + ".lang"), 'wb') { |f| f.write filter.dump }
13
+ end
data/lang/swedish.lang ADDED
Binary file
data/lib/whatlanguage.rb CHANGED
@@ -2,7 +2,7 @@ require File.join(File.dirname(__FILE__), 'bloominsimple')
2
2
  require 'digest/sha1'
3
3
 
4
4
  class WhatLanguage
5
- VERSION = '1.0.0'
5
+ VERSION = '1.0.2'
6
6
 
7
7
  HASHER = lambda { |item| Digest::SHA1.digest(item.downcase.strip).unpack("VV") }
8
8
 
@@ -10,7 +10,7 @@ class WhatLanguage
10
10
 
11
11
  @@data = {}
12
12
 
13
- def initialize(options)
13
+ def initialize(options = {})
14
14
  languages_folder = File.join(File.dirname(__FILE__), "..", "lang")
15
15
  Dir.entries(languages_folder).grep(/\.lang/).each do |lang|
16
16
  @@data[lang[/\w+/].to_sym] ||= BloominSimple.from_dump(File.read(File.join(languages_folder, lang)), &HASHER)
@@ -18,6 +18,10 @@ class TestWhatLanguage < Test::Unit::TestCase
18
18
  def test_spanish
19
19
  assert_equal :spanish, @wl.language("La palabra mezquita se usa en español para referirse a todo tipo de edificios dedicados.")
20
20
  end
21
+
22
+ def test_swedish
23
+ assert_equal :swedish, @wl.language("Den spanska räven rev en annan räv alldeles lagom.")
24
+ end
21
25
 
22
26
  def test_nothing
23
27
  assert_nil @wl.language("")
data/whatlanguage.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "whatlanguage"
3
- s.version = "1.0.1"
4
- s.date = "2008-08-22"
3
+ s.version = "1.0.2"
4
+ s.date = "2008-08-23"
5
5
  s.summary = "Natural language detection for text samples"
6
6
  s.email = "whatlanguage@peterc.org"
7
7
  s.homepage = "http://github.com/peterc/whatlanguage"
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
22
22
  "lang/portuguese.lang",
23
23
  "lang/russian.lang",
24
24
  "lang/spanish.lang",
25
+ "lang/swedish.lang",
25
26
  "lib/bitfield.rb",
26
27
  "lib/bloominsimple.rb",
27
28
  "lib/whatlanguage.rb",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: peterc-whatlanguage
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Cooper
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-08-22 00:00:00 -07:00
12
+ date: 2008-08-23 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -37,6 +37,7 @@ files:
37
37
  - lang/portuguese.lang
38
38
  - lang/russian.lang
39
39
  - lang/spanish.lang
40
+ - lang/swedish.lang
40
41
  - lib/bitfield.rb
41
42
  - lib/bloominsimple.rb
42
43
  - lib/whatlanguage.rb