what_you_say 0.4.4-x64-mingw-ucrt → 0.5.1-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +28 -3
- data/lib/what_you_say/3.1/what_you_say.so +0 -0
- data/lib/what_you_say/3.2/what_you_say.so +0 -0
- data/lib/what_you_say/lang.rb +1 -1
- data/lib/what_you_say/version.rb +2 -2
- data/lib/what_you_say.rb +5 -7
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a3eebfcd2cdf1377d60b3deb8e1c6aa0a8ea23aa22a30768223efcc2533e30b
|
4
|
+
data.tar.gz: 169c7347a3c39525a68217a5c5ba9365624e600e720a68078227446f8a7aa564
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bb173ee280b022d7c97ef25a5518b4e98a9ccd10cf96cf09ab536b3e0539ce77f7d1af3e9e62e58c1eb055ddd8799babce1eee145d86c58763631db87514df91
|
7
|
+
data.tar.gz: 3ca78fe5487913a953194abb384eed9f8aa55c7d6e3f7517f3e9ac0ce822e2d723d16afed73716dfe30db368b5eeb91075f94785826c223e8866c1f779222aa0
|
data/README.md
CHANGED
@@ -20,7 +20,7 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
-
The method to call is `
|
23
|
+
The method to call is `detect_language`.
|
24
24
|
|
25
25
|
Pass in the text whose language you want to detect:
|
26
26
|
|
@@ -29,7 +29,7 @@ require "what_you_say"
|
|
29
29
|
|
30
30
|
text = "Ĉu vi ne volas eklerni Esperanton? Bonvolu! Estas unu de la plej bonaj aferoj!"
|
31
31
|
|
32
|
-
result = WhatYouSay.
|
32
|
+
result = WhatYouSay.new.detect_language(text)
|
33
33
|
|
34
34
|
assert_equal("epo", result.lang.code)
|
35
35
|
assert_equal("esperanto", result.lang.eng_name)
|
@@ -39,10 +39,35 @@ You also have to opportunity to `inspect` some output:
|
|
39
39
|
|
40
40
|
```ruby
|
41
41
|
text = "Եվ ահա ես ստանում եմ մի զանգ պատահական տղայից"
|
42
|
-
WhatYouSay.
|
42
|
+
WhatYouSay.new.detect_language(text).inspect
|
43
43
|
#=> #<WhatYouSay::Lang code="hye" eng_name="armenian">
|
44
44
|
```
|
45
45
|
|
46
|
+
Not everything in life is perfect, and neither is this lib. Sometimes language detection will be wildly mistaken. You
|
47
|
+
can attempt to correct this by passing in an `allowlist` of supported languages:
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
text = "สวัสดี Rágis hello"
|
51
|
+
result = WhatYouSay.new.detect_language(text)
|
52
|
+
|
53
|
+
assert_equal("spanish", result.eng_name)
|
54
|
+
|
55
|
+
result = WhatYouSay.new(allowlist: ["English", "Thai"]).detect_language(text)
|
56
|
+
|
57
|
+
assert_equal("eng", result.code)
|
58
|
+
```
|
59
|
+
|
60
|
+
If a language truly cannot be detected, the `Unknown` language type is returned:
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
text = "日本語"
|
64
|
+
|
65
|
+
result = WhatYouSay.new(allowlist: ["English", "Thai"]).detect_language(text)
|
66
|
+
|
67
|
+
assert_equal("???", result.code)
|
68
|
+
assert_equal("unknown", result.eng_name)
|
69
|
+
```
|
70
|
+
|
46
71
|
## Development
|
47
72
|
|
48
73
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake compile test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
Binary file
|
Binary file
|
data/lib/what_you_say/lang.rb
CHANGED
data/lib/what_you_say/version.rb
CHANGED
data/lib/what_you_say.rb
CHANGED
@@ -9,13 +9,11 @@ if ENV.fetch("DEBUG", false)
|
|
9
9
|
require "debug"
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
raise TypeError, "text must be UTF-8 encoded; got #{text.encoding}!" unless text.encoding.name == "UTF-8"
|
12
|
+
class WhatYouSay
|
13
|
+
def detect_language(text)
|
14
|
+
raise TypeError, "text must be a String; got a #{text.class}!" unless text.is_a?(String)
|
15
|
+
raise TypeError, "text must be UTF-8 encoded; got #{text.encoding}!" unless text.encoding.name == "UTF-8"
|
17
16
|
|
18
|
-
|
19
|
-
end
|
17
|
+
detect_text(text)
|
20
18
|
end
|
21
19
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: what_you_say
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: x64-mingw-ucrt
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-04-
|
11
|
+
date: 2023-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -38,8 +38,8 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.2'
|
41
|
-
description: Natural language
|
42
|
-
Currently wraps the
|
41
|
+
description: Natural language detection with a focus on simplicity and performance.
|
42
|
+
Currently wraps the lingua-rs Rust crate.
|
43
43
|
email:
|
44
44
|
- gjtorikian@users.noreply.github.com
|
45
45
|
executables: []
|