scylla 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cc4efa8f7db7f8d6af1db0fb2ce7c204b6238fd7
4
+ data.tar.gz: 96723ba86150e5612a1b88ae7ef568bf4080c13a
5
+ SHA512:
6
+ metadata.gz: e99255de08664a38cc0f155f2d4789f04139dc2277d285aebfa887dcfa4956f9caec90dabfecc1316e6652795f0220c5d5a47decd57f2a4d15da3106c86ad4d1
7
+ data.tar.gz: b64726df9e01a455e475818e7dd0b9cbe285c056d04a06a7bb576273e8e772444c9c316728c0b0140a400dc2cbfbd7e8cc482065bde720575830ffe8767c3b1f
data/README.rdoc CHANGED
@@ -26,20 +26,11 @@ Multiple results for other possible languages:
26
26
 
27
27
  == Training
28
28
 
29
- You can train scylla in new languages by providing sample texts in different languages. The default set is located in the 'source_texts' folder in the gem directory. Add new .txt files to this directory named according to the language i.e. a text file full of Hebrew text should be called 'hebrew.txt'. At least 500 lines of text recommended. Then, in the gem folder, run this:
29
+ Training is done by fetching data from wikipedia. To fetch latest articles (The country name in the language
30
+ in question, eg. "England" for English or "日本" for Japanese), run
30
31
 
31
32
  rake scylla:train
32
33
 
33
- If you want to store texts in your own folder, you can specify that to the rake task.
34
- WARNING: specifying a different folder deletes all language support for files located in the default directory if they are not copied over.
35
-
36
- rake scylla:train[/Users/hash/mytextdir]
37
- "Creating language map for /Users/hash/mytextdir/english.txt"
38
- "Creating language map for /Users/hash/mytextdir/kannada.txt"
39
- .
40
- .
41
- etc
42
-
43
34
  == Contributing to scylla
44
35
 
45
36
  * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
  require 'helper'
3
3
 
4
- class ClassifierTest < Test::Unit::TestCase
4
+ class ClassifierTest < Minitest::Test
5
5
 
6
6
  context "#classify" do
7
7
  setup do
@@ -1,6 +1,6 @@
1
1
  require 'helper'
2
2
 
3
- class GeneratorTest < Test::Unit::TestCase
3
+ class GeneratorTest < Minitest::Test
4
4
  context "create_lm ngrams" do
5
5
  setup do
6
6
  Scylla::Loader.set_dir(File.join("test","fixtures","lms"))
data/test/helper.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'rubygems'
2
- require 'ruby-debug'
3
2
  require 'bundler'
4
- require 'test/unit'
3
+ require "minitest/autorun"
4
+ # require 'test/unit'
5
5
  require 'shoulda'
6
6
  require 'scylla'
7
7
  require 'sanitize'
@@ -18,5 +18,5 @@ end
18
18
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
19
19
  $LOAD_PATH.unshift(File.dirname(__FILE__))
20
20
 
21
- class Test::Unit::TestCase
21
+ class Minitest::Test
22
22
  end
@@ -1,6 +1,6 @@
1
1
  require 'helper'
2
2
 
3
- class LanguageTest < Test::Unit::TestCase
3
+ class LanguageTest < Minitest::Test
4
4
  context "language detection" do
5
5
  setup do
6
6
  @sentences = Hash.new
data/test/loader_test.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'helper'
2
2
 
3
- class LoaderTest < Test::Unit::TestCase
3
+ class LoaderTest < Minitest::Test
4
4
  context "#languages" do
5
5
  setup do
6
6
  Scylla::Loader.clear
data/test/scylla_test.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'helper'
2
2
 
3
- class ScyllaTest < Test::Unit::TestCase
3
+ class ScyllaTest < Minitest::Test
4
4
  context "String methods" do
5
5
  setup do
6
6
  Scylla::Loader.set_dir(File.join("test","fixtures","lms"))
@@ -12,8 +12,8 @@ class ScyllaTest < Test::Unit::TestCase
12
12
  end
13
13
 
14
14
  should "load language results for strings" do
15
- assert_not_nil @language
16
- assert_not_nil @languages
15
+ assert @language
16
+ assert @languages
17
17
  assert_equal String, @language.class
18
18
  assert_equal Array, @languages.class
19
19
  assert_equal "english", @language
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scylla
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
5
- prerelease:
4
+ version: 1.0.8
6
5
  platform: ruby
7
6
  authors:
8
7
  - Ashwin Hegde
@@ -14,35 +13,45 @@ dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bundler
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - ">="
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - ">="
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: sanitize
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - ">="
36
32
  - !ruby/object:Gem::Version
37
33
  version: '0'
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - ">="
44
39
  - !ruby/object:Gem::Version
45
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: unicode
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.4.4
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.4.4
46
55
  description: Allows for text categorization by guessing the language of a given text
47
56
  using n-grams
48
57
  email: ahegde@zendesk.com
@@ -53,6 +62,10 @@ extra_rdoc_files:
53
62
  - LICENSE.txt
54
63
  - README.rdoc
55
64
  files:
65
+ - LICENSE.txt
66
+ - README.rdoc
67
+ - bin/scylla
68
+ - lib/scylla.rb
56
69
  - lib/scylla/classifier.rb
57
70
  - lib/scylla/generator.rb
58
71
  - lib/scylla/lms/arabic.lm
@@ -95,9 +108,6 @@ files:
95
108
  - lib/scylla/resources.rb
96
109
  - lib/scylla/string.rb
97
110
  - lib/scylla/tasks.rb
98
- - lib/scylla.rb
99
- - README.rdoc
100
- - LICENSE.txt
101
111
  - test/classifier_test.rb
102
112
  - test/fixtures/lms/arabic.lm
103
113
  - test/fixtures/lms/bulgarian.lm
@@ -157,31 +167,29 @@ files:
157
167
  - test/language_test.rb
158
168
  - test/loader_test.rb
159
169
  - test/scylla_test.rb
160
- - bin/scylla
161
170
  homepage: http://github.com/hashwin/scylla
162
171
  licenses:
163
172
  - MIT
173
+ metadata: {}
164
174
  post_install_message:
165
175
  rdoc_options: []
166
176
  require_paths:
167
177
  - lib
168
178
  required_ruby_version: !ruby/object:Gem::Requirement
169
- none: false
170
179
  requirements:
171
- - - ! '>='
180
+ - - ">="
172
181
  - !ruby/object:Gem::Version
173
182
  version: '0'
174
183
  required_rubygems_version: !ruby/object:Gem::Requirement
175
- none: false
176
184
  requirements:
177
- - - ! '>='
185
+ - - ">="
178
186
  - !ruby/object:Gem::Version
179
187
  version: '0'
180
188
  requirements: []
181
189
  rubyforge_project:
182
- rubygems_version: 1.8.25
190
+ rubygems_version: 2.5.2
183
191
  signing_key:
184
- specification_version: 3
192
+ specification_version: 4
185
193
  summary: Ruby port of Textcat language guesser
186
194
  test_files:
187
195
  - test/classifier_test.rb