opener-language-identifier 4.2.1 → 4.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/core/target/LanguageDetection-1.0.0.jar +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/Detector.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/DetectorFactory.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/ErrorCode.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/util/LangProfile.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/util/Messages.class +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/util/NGram.class +0 -0
- data/core/target/opennlp/langdetect-183.bin +0 -0
- data/core/target/opennlp/opennlp-tools-1.8.4.jar +0 -0
- data/lib/opener/language_identifier.rb +14 -16
- data/lib/opener/language_identifier/backend/detect_language_com.rb +56 -0
- data/lib/opener/language_identifier/backend/language_detection.rb +142 -0
- data/lib/opener/language_identifier/backend/opennlp.rb +143 -0
- data/lib/opener/language_identifier/cli.rb +3 -10
- data/lib/opener/language_identifier/detector.rb +21 -138
- data/lib/opener/language_identifier/version.rb +3 -1
- data/opener-language-identifier.gemspec +5 -2
- metadata +180 -144
- data/core/target/LanguageDetection-0.0.1.jar +0 -0
- data/core/target/classes/com/cybozu/labs/langdetect/af +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: eebf7674631ee01c80ae1285ab658133ee9da0e35a854aa8c1855f52e04cf27d
|
4
|
+
data.tar.gz: 05f72d5366f9cfdfaf87d71004a977778284091d3ffc0318f725080126f9a6ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab868922fd23754e4a4575372f02f39213a13ddd11fdbdcda8541dc4a63130efb6b2db62f74141915994e0422fe2eb9f03e9f00a0f046a340ae23ae7b50737eb
|
7
|
+
data.tar.gz: 42d395b265355acf8b15b0d4cb60663c0d9a43dc686c8afb91eb2ae94b554914a0033b2306ce56cc5fd4acc824999526c99680e3f89eb3ba9da127ad6f7ad95a
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,13 +1,19 @@
|
|
1
1
|
require 'open3'
|
2
2
|
require 'slop'
|
3
3
|
require 'builder'
|
4
|
+
require 'pp'
|
4
5
|
|
5
|
-
|
6
|
+
require 'detect_language'
|
6
7
|
|
7
8
|
require_relative 'language_identifier/version'
|
8
9
|
require_relative 'language_identifier/kaf_builder'
|
9
10
|
require_relative 'language_identifier/cli'
|
10
|
-
require_relative 'language_identifier/detector
|
11
|
+
require_relative 'language_identifier/detector'
|
12
|
+
require_relative 'language_identifier/backend/detect_language_com'
|
13
|
+
if RUBY_ENGINE == 'jruby'
|
14
|
+
require_relative 'language_identifier/backend/language_detection'
|
15
|
+
require_relative 'language_identifier/backend/opennlp'
|
16
|
+
end
|
11
17
|
|
12
18
|
module Opener
|
13
19
|
##
|
@@ -26,9 +32,8 @@ module Opener
|
|
26
32
|
# @return [Hash]
|
27
33
|
#
|
28
34
|
DEFAULT_OPTIONS = {
|
29
|
-
:
|
30
|
-
:
|
31
|
-
:probs => false
|
35
|
+
args: [],
|
36
|
+
kaf: true,
|
32
37
|
}.freeze
|
33
38
|
|
34
39
|
##
|
@@ -40,12 +45,9 @@ module Opener
|
|
40
45
|
# @option options [TrueClass|FalseClass] :kaf When set to `true` the
|
41
46
|
# results will be displayed as KAF.
|
42
47
|
#
|
43
|
-
# @option options [TrueClass|FalseClass] :probs Wen set the probabilities
|
44
|
-
# are returned instead of the language/KAF.
|
45
|
-
#
|
46
48
|
def initialize(options = {})
|
47
49
|
@options = DEFAULT_OPTIONS.merge(options)
|
48
|
-
@detector = Detector.new
|
50
|
+
@detector = Detector.new ENV['BACKEND'], ENV['FALLBACK']
|
49
51
|
end
|
50
52
|
|
51
53
|
##
|
@@ -55,13 +57,9 @@ module Opener
|
|
55
57
|
# @param [String] input The text of which to detect the language.
|
56
58
|
# @return [Array]
|
57
59
|
#
|
58
|
-
def run
|
59
|
-
|
60
|
-
|
61
|
-
else
|
62
|
-
output = @detector.detect(input)
|
63
|
-
output = build_kaf(input, output) if options[:kaf]
|
64
|
-
end
|
60
|
+
def run input, params = {}
|
61
|
+
output = @detector.detect input
|
62
|
+
output = build_kaf input, output if options[:kaf]
|
65
63
|
|
66
64
|
return output
|
67
65
|
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Opener
|
2
|
+
class LanguageIdentifier
|
3
|
+
module Backend
|
4
|
+
class DetectLanguageCom
|
5
|
+
|
6
|
+
##
|
7
|
+
# Unknown or languages that use a different code
|
8
|
+
#
|
9
|
+
CODE_MAP = {
|
10
|
+
bug: nil,
|
11
|
+
ceb: :tl,
|
12
|
+
chr: nil,
|
13
|
+
crs: nil,
|
14
|
+
egy: nil,
|
15
|
+
got: nil,
|
16
|
+
haw: nil,
|
17
|
+
hmn: nil,
|
18
|
+
iw: :he,
|
19
|
+
jw: :jv,
|
20
|
+
kha: nil,
|
21
|
+
lif: :li,
|
22
|
+
mfe: nil,
|
23
|
+
nso: nil,
|
24
|
+
sco: nil,
|
25
|
+
syr: nil,
|
26
|
+
tlh: nil,
|
27
|
+
war: :tl,
|
28
|
+
zh: :'zh-cn',
|
29
|
+
'zh-Hant': :'zh-cn',
|
30
|
+
}
|
31
|
+
|
32
|
+
def initialize
|
33
|
+
DetectLanguage.configure do |config|
|
34
|
+
config.secure = true
|
35
|
+
config.api_key = ENV['DETECT_LANGUAGE_TOKEN']
|
36
|
+
raise 'no detectlanguage token specified' if config.api_key.nil?
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def detect input
|
41
|
+
code = DetectLanguage.simple_detect input
|
42
|
+
return 'unknown' unless code
|
43
|
+
|
44
|
+
mapped_code = CODE_MAP[code.to_sym]
|
45
|
+
return mapped_code.to_s if mapped_code
|
46
|
+
|
47
|
+
code
|
48
|
+
|
49
|
+
rescue
|
50
|
+
retry
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require_relative '../../../../core/target/LanguageDetection-1.0.0.jar'
|
2
|
+
|
3
|
+
module Opener
|
4
|
+
class LanguageIdentifier
|
5
|
+
module Backend
|
6
|
+
class LanguageDetection
|
7
|
+
|
8
|
+
##
|
9
|
+
# Path to the directory containing the default profiles.
|
10
|
+
#
|
11
|
+
# @return [String]
|
12
|
+
#
|
13
|
+
DEFAULT_PROFILES_PATH = File.expand_path(
|
14
|
+
'../../../../../core/target/classes/profiles',
|
15
|
+
__FILE__
|
16
|
+
)
|
17
|
+
|
18
|
+
##
|
19
|
+
# Path to the directory containing the default short profiles.
|
20
|
+
#
|
21
|
+
# @return [String]
|
22
|
+
#
|
23
|
+
DEFAULT_SHORT_PROFILES_PATH = File.expand_path(
|
24
|
+
'../../../../../core/target/classes/short_profiles',
|
25
|
+
__FILE__
|
26
|
+
)
|
27
|
+
|
28
|
+
##
|
29
|
+
# Prioritize OpeNER languages over the rest. Languages not covered by this
|
30
|
+
# list are automatically given a default priority.
|
31
|
+
#
|
32
|
+
# @return [Hash]
|
33
|
+
#
|
34
|
+
PRIORITIES = {
|
35
|
+
'en' => 1.0,
|
36
|
+
'es' => 0.9,
|
37
|
+
'it' => 0.9,
|
38
|
+
'fr' => 0.9,
|
39
|
+
'de' => 0.9,
|
40
|
+
'nl' => 0.9,
|
41
|
+
|
42
|
+
# These languages are disabled (for the time being) due to conflicting
|
43
|
+
# with other (OpeNER) languages too often.
|
44
|
+
'af' => 0.0, # conflicts with Dutch
|
45
|
+
}
|
46
|
+
|
47
|
+
##
|
48
|
+
# The default priority for non OpeNER languages.
|
49
|
+
#
|
50
|
+
# @return [Float]
|
51
|
+
#
|
52
|
+
DEFAULT_PRIORITY = 0.5
|
53
|
+
|
54
|
+
##
|
55
|
+
# The amount of characters after which the detector should switch to using
|
56
|
+
# the longer profiles set.
|
57
|
+
#
|
58
|
+
# @return [Fixnum]
|
59
|
+
#
|
60
|
+
SHORT_THRESHOLD = 15
|
61
|
+
|
62
|
+
def initialize
|
63
|
+
@factory = com.cybozu.labs.langdetect.DetectorFactory.new
|
64
|
+
end
|
65
|
+
|
66
|
+
def new_detector input
|
67
|
+
@factory.load_profile determine_profiles input
|
68
|
+
@factory.set_seed 1
|
69
|
+
|
70
|
+
priorities = build_priorities input, @factory.langlist
|
71
|
+
detector = com.cybozu.labs.langdetect.Detector.new @factory
|
72
|
+
|
73
|
+
detector.set_prior_map priorities
|
74
|
+
detector.append input.downcase
|
75
|
+
detector
|
76
|
+
end
|
77
|
+
|
78
|
+
##
|
79
|
+
# @return [String]
|
80
|
+
#
|
81
|
+
def detect input
|
82
|
+
detector = new_detector input
|
83
|
+
detector.detect
|
84
|
+
|
85
|
+
# The core Java code raise an exception when it can't detect a language.
|
86
|
+
# Since this isn't actually something fatal we'll capture this and return
|
87
|
+
# "unknown" instead.
|
88
|
+
rescue com.cybozu.labs.langdetect.LangDetectException
|
89
|
+
return 'unknown'
|
90
|
+
end
|
91
|
+
|
92
|
+
protected
|
93
|
+
|
94
|
+
##
|
95
|
+
# Builds a Java Hash mapping the priorities for all OpeNER and non OpeNER
|
96
|
+
# languages.
|
97
|
+
#
|
98
|
+
# If the input size is smaller than the short profiles threshold non
|
99
|
+
# OpeNER languages are _disabled_. This is to ensure that these languages
|
100
|
+
# are detected properly when analysing only 1-2 words.
|
101
|
+
#
|
102
|
+
# @param [String] input
|
103
|
+
# @param [Array<String>] languages
|
104
|
+
# @return [java.util.HashMap]
|
105
|
+
#
|
106
|
+
def build_priorities input, languages
|
107
|
+
priorities = java.util.HashMap.new
|
108
|
+
priority = if short_input? input then 0.0 else DEFAULT_PRIORITY end
|
109
|
+
|
110
|
+
PRIORITIES.each do |lang, val|
|
111
|
+
priorities.put(lang, val)
|
112
|
+
end
|
113
|
+
|
114
|
+
languages.each do |language|
|
115
|
+
unless priorities.contains_key(language)
|
116
|
+
priorities.put(language, priority)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
priorities
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# @param [String] input
|
125
|
+
# @return [String]
|
126
|
+
#
|
127
|
+
def determine_profiles input
|
128
|
+
if short_input? input then DEFAULT_SHORT_PROFILES_PATH else DEFAULT_PROFILES_PATH end
|
129
|
+
end
|
130
|
+
|
131
|
+
##
|
132
|
+
# @param [String] input
|
133
|
+
# @return [TrueClass|FalseClass]
|
134
|
+
#
|
135
|
+
def short_input? input
|
136
|
+
input.length <= SHORT_THRESHOLD
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
if ENV.values_at('BACKEND', 'FALLBACK').include? 'Opennlp'
|
2
|
+
require_relative '../../../../core/target/opennlp/opennlp-tools-1.8.4.jar'
|
3
|
+
end
|
4
|
+
|
5
|
+
module Opener
|
6
|
+
class LanguageIdentifier
|
7
|
+
module Backend
|
8
|
+
class Opennlp
|
9
|
+
|
10
|
+
include_package 'opennlp.tools.langdetect'
|
11
|
+
|
12
|
+
MODEL_FILE = File.expand_path '../../../../core/target/opennlp/langdetect-183.bin', File.dirname(__FILE__)
|
13
|
+
ISOCODE_MAP = {
|
14
|
+
afr: :nl,
|
15
|
+
ara: :ar,
|
16
|
+
aze: :az,
|
17
|
+
bak: :ba,
|
18
|
+
bel: :be,
|
19
|
+
ben: :bn,
|
20
|
+
bos: :bs,
|
21
|
+
bre: :br,
|
22
|
+
bul: :bg,
|
23
|
+
cat: :ca,
|
24
|
+
ces: :cs,
|
25
|
+
che: :ce,
|
26
|
+
cmn: :'zh-cn',
|
27
|
+
nan: :'zh-cn',
|
28
|
+
cym: :cy,
|
29
|
+
dan: :da,
|
30
|
+
deu: :de,
|
31
|
+
gsw: :de,
|
32
|
+
nds: :de,
|
33
|
+
ell: :el,
|
34
|
+
eng: :en,
|
35
|
+
epo: :eo,
|
36
|
+
est: :et,
|
37
|
+
ekk: :et,
|
38
|
+
eus: :eu,
|
39
|
+
fao: :fo,
|
40
|
+
fin: :fi,
|
41
|
+
fra: :fr,
|
42
|
+
fry: :fy,
|
43
|
+
gle: :ga,
|
44
|
+
glg: :gl,
|
45
|
+
guj: :gu,
|
46
|
+
heb: :he,
|
47
|
+
hin: :hi,
|
48
|
+
hrv: :hr,
|
49
|
+
hun: :hu,
|
50
|
+
hye: :hy,
|
51
|
+
ind: :id,
|
52
|
+
isl: :is,
|
53
|
+
ita: :it,
|
54
|
+
jav: :jv,
|
55
|
+
jpn: :ja,
|
56
|
+
kan: :kn,
|
57
|
+
kat: :ka,
|
58
|
+
kaz: :kk,
|
59
|
+
kir: :ky,
|
60
|
+
kor: :ko,
|
61
|
+
lat: :la,
|
62
|
+
lim: :li,
|
63
|
+
lit: :lt,
|
64
|
+
ltz: :lb,
|
65
|
+
lav: :lv,
|
66
|
+
lvs: :lv,
|
67
|
+
mal: :ml,
|
68
|
+
mar: :mr,
|
69
|
+
mkd: :mk,
|
70
|
+
mlt: :mt,
|
71
|
+
mon: :mn,
|
72
|
+
mri: :mi,
|
73
|
+
min: :ms,
|
74
|
+
msa: :ms,
|
75
|
+
nep: :ne,
|
76
|
+
nld: :nl,
|
77
|
+
nno: :nn,
|
78
|
+
nob: :no,
|
79
|
+
oci: :oc,
|
80
|
+
pan: :pa,
|
81
|
+
pnb: :pa,
|
82
|
+
plt: :mg,
|
83
|
+
fas: :fa,
|
84
|
+
pes: :fa,
|
85
|
+
pol: :pl,
|
86
|
+
por: :pt,
|
87
|
+
pus: :ps,
|
88
|
+
ron: :ro,
|
89
|
+
rus: :ru,
|
90
|
+
san: :sa,
|
91
|
+
sin: :si,
|
92
|
+
slk: :sk,
|
93
|
+
slv: :sl,
|
94
|
+
som: :so,
|
95
|
+
ast: :es,
|
96
|
+
spa: :es,
|
97
|
+
sqi: :sq,
|
98
|
+
srp: :sr,
|
99
|
+
sun: :su,
|
100
|
+
swa: :sw,
|
101
|
+
swe: :sv,
|
102
|
+
tam: :ta,
|
103
|
+
tat: :tt,
|
104
|
+
tel: :te,
|
105
|
+
tgk: :tg,
|
106
|
+
ceb: :tl,
|
107
|
+
tgl: :tl,
|
108
|
+
war: :tl,
|
109
|
+
tha: :th,
|
110
|
+
tur: :tr,
|
111
|
+
ukr: :uk,
|
112
|
+
urd: :ur,
|
113
|
+
uzb: :uz,
|
114
|
+
vie: :vi,
|
115
|
+
vol: :vo,
|
116
|
+
zul: :zu,
|
117
|
+
}
|
118
|
+
|
119
|
+
def initialize
|
120
|
+
model_file = java.io.File.new MODEL_FILE
|
121
|
+
input = java.io.FileInputStream.new model_file
|
122
|
+
@model = LanguageDetectorModel.new input
|
123
|
+
@detector = LanguageDetectorME.new @model
|
124
|
+
end
|
125
|
+
|
126
|
+
def detect input
|
127
|
+
language = @detector.predictLanguage input
|
128
|
+
return 'unknown' unless language
|
129
|
+
|
130
|
+
code = ISOCODE_MAP[language.getLang.to_sym]
|
131
|
+
return 'unknown' unless code
|
132
|
+
|
133
|
+
code.to_s
|
134
|
+
|
135
|
+
rescue
|
136
|
+
return 'unknown'
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
@@ -48,25 +48,18 @@ Example:
|
|
48
48
|
end
|
49
49
|
|
50
50
|
on :'no-kaf', 'Disables KAF output'
|
51
|
-
on :p, :probs, 'Displays probabilities instead of a language code'
|
52
51
|
|
53
52
|
run do |opts, args|
|
54
53
|
enable_kaf = true
|
55
|
-
enable_probs = false
|
56
54
|
|
57
55
|
if opts[:'no-kaf']
|
58
56
|
enable_kaf = false
|
59
57
|
end
|
60
58
|
|
61
|
-
if opts[:probs]
|
62
|
-
enable_kf = false
|
63
|
-
enable_probs = true
|
64
|
-
end
|
65
|
-
|
66
59
|
identifier = LanguageIdentifier.new(
|
67
|
-
:
|
68
|
-
:
|
69
|
-
:
|
60
|
+
args: args,
|
61
|
+
kaf: enable_kaf,
|
62
|
+
probs: false,
|
70
63
|
)
|
71
64
|
|
72
65
|
input = STDIN.tty? ? nil : STDIN.read
|
@@ -6,159 +6,42 @@ module Opener
|
|
6
6
|
# assigning priorities to languages, etc.
|
7
7
|
#
|
8
8
|
class Detector
|
9
|
-
attr_reader :profiles_path, :short_profiles_path
|
10
9
|
|
11
|
-
|
12
|
-
# Path to the directory containing the default profiles.
|
13
|
-
#
|
14
|
-
# @return [String]
|
15
|
-
#
|
16
|
-
DEFAULT_PROFILES_PATH = File.expand_path(
|
17
|
-
'../../../../core/target/classes/profiles',
|
18
|
-
__FILE__
|
19
|
-
)
|
20
|
-
|
21
|
-
##
|
22
|
-
# Path to the directory containing the default short profiles.
|
23
|
-
#
|
24
|
-
# @return [String]
|
25
|
-
#
|
26
|
-
DEFAULT_SHORT_PROFILES_PATH = File.expand_path(
|
27
|
-
'../../../../core/target/classes/short_profiles',
|
28
|
-
__FILE__
|
29
|
-
)
|
30
|
-
|
31
|
-
##
|
32
|
-
# The amount of characters after which the detector should switch to using
|
33
|
-
# the longer profiles set.
|
34
|
-
#
|
35
|
-
# @return [Fixnum]
|
36
|
-
#
|
37
|
-
SHORT_THRESHOLD = 15
|
38
|
-
|
39
|
-
##
|
40
|
-
# Prioritize OpeNER languages over the rest. Languages not covered by this
|
41
|
-
# list are automatically given a default priority.
|
42
|
-
#
|
43
|
-
# @return [Hash]
|
44
|
-
#
|
45
|
-
PRIORITIES = {
|
46
|
-
'en' => 1.0,
|
47
|
-
'es' => 0.9,
|
48
|
-
'it' => 0.9,
|
49
|
-
'fr' => 0.9,
|
50
|
-
'de' => 0.9,
|
51
|
-
'nl' => 0.9,
|
52
|
-
|
53
|
-
# These languages are disabled (for the time being) due to conflicting
|
54
|
-
# with other (OpeNER) languages too often.
|
55
|
-
'af' => 0.0, # conflicts with Dutch
|
56
|
-
}
|
57
|
-
|
58
|
-
##
|
59
|
-
# The default priority for non OpeNER languages.
|
60
|
-
#
|
61
|
-
# @return [Float]
|
62
|
-
#
|
63
|
-
DEFAULT_PRIORITY = 0.5
|
10
|
+
attr_reader :backend
|
64
11
|
|
65
12
|
##
|
66
13
|
# @param [Hash] options
|
67
14
|
#
|
68
|
-
# @option options [String] :profiles_path
|
69
|
-
# @option options [String] :short_profiles_path
|
70
15
|
#
|
71
|
-
def initialize
|
72
|
-
|
73
|
-
|
74
|
-
|
16
|
+
def initialize backend = nil, fallback = nil
|
17
|
+
klass = Backend.const_get backend.to_sym if backend
|
18
|
+
klass ||= Backend::LanguageDetection
|
19
|
+
@backend = klass.new
|
20
|
+
|
21
|
+
klass = Backend.const_get fallback.to_sym if fallback
|
22
|
+
@fallback = klass.new if klass
|
75
23
|
|
76
|
-
@
|
77
|
-
@short_profiles_path ||= DEFAULT_SHORT_PROFILES_PATH
|
24
|
+
@timeout = ENV['TIMEOUT']&.to_i
|
78
25
|
end
|
79
26
|
|
80
27
|
##
|
81
28
|
# @return [String]
|
82
29
|
#
|
83
30
|
def detect(input)
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
#
|
90
|
-
def probabilities(input)
|
91
|
-
return new_detector(input).get_probabilities.to_array
|
92
|
-
end
|
93
|
-
|
94
|
-
##
|
95
|
-
# Returns a new detector with the profiles set based on the input.
|
96
|
-
#
|
97
|
-
# This method analyses a lowercased version of the input as this yields
|
98
|
-
# better results for short text.
|
99
|
-
#
|
100
|
-
# @param [String] input
|
101
|
-
# @return [CybozuDetector]
|
102
|
-
#
|
103
|
-
def new_detector(input)
|
104
|
-
factory = com.cybozu.labs.langdetect.DetectorFactory.new
|
105
|
-
|
106
|
-
factory.load_profile(determine_profiles(input))
|
107
|
-
factory.set_seed(1)
|
108
|
-
|
109
|
-
priorities = build_priorities(input, factory.langlist)
|
110
|
-
detector = com.cybozu.labs.langdetect.Detector.new(factory)
|
111
|
-
|
112
|
-
detector.set_prior_map(priorities)
|
113
|
-
detector.append(input.downcase)
|
114
|
-
|
115
|
-
return detector
|
31
|
+
backend_detect @backend, input
|
32
|
+
rescue
|
33
|
+
raise unless @fallback
|
34
|
+
puts 'Using fallback backend' if ENV['DEBUG']
|
35
|
+
backend_detect @fallback, input
|
116
36
|
end
|
117
37
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
# If the input size is smaller than the short profiles threshold non
|
123
|
-
# OpeNER languages are _disabled_. This is to ensure that these languages
|
124
|
-
# are detected properly when analysing only 1-2 words.
|
125
|
-
#
|
126
|
-
# @param [String] input
|
127
|
-
# @param [Array<String>] languages
|
128
|
-
# @return [java.util.HashMap]
|
129
|
-
#
|
130
|
-
def build_priorities(input, languages)
|
131
|
-
priorities = java.util.HashMap.new
|
132
|
-
priority = short_input?(input) ? 0.0 : DEFAULT_PRIORITY
|
133
|
-
|
134
|
-
PRIORITIES.each do |lang, val|
|
135
|
-
priorities.put(lang, val)
|
136
|
-
end
|
137
|
-
|
138
|
-
languages.each do |language|
|
139
|
-
unless priorities.contains_key(language)
|
140
|
-
priorities.put(language, priority)
|
141
|
-
end
|
38
|
+
def backend_detect backend, input
|
39
|
+
return backend.detect input unless @timeout
|
40
|
+
Timeout.timeout @timeout do
|
41
|
+
backend.detect input
|
142
42
|
end
|
143
|
-
|
144
|
-
return priorities
|
145
43
|
end
|
146
44
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
#
|
151
|
-
def determine_profiles(input)
|
152
|
-
return short_input?(input) ? short_profiles_path : profiles_path
|
153
|
-
end
|
154
|
-
|
155
|
-
##
|
156
|
-
# @param [String] input
|
157
|
-
# @return [TrueClass|FalseClass]
|
158
|
-
#
|
159
|
-
def short_input?(input)
|
160
|
-
return input.length <= SHORT_THRESHOLD
|
161
|
-
end
|
162
|
-
end # Detector
|
163
|
-
end # LanguageIdentifier
|
164
|
-
end # Opener
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -7,13 +7,13 @@ Gem::Specification.new do |gem|
|
|
7
7
|
gem.summary = 'Language identifier for human readable text.'
|
8
8
|
gem.description = gem.summary
|
9
9
|
gem.homepage = "http://opener-project.github.com/"
|
10
|
-
gem.has_rdoc = 'yard'
|
11
10
|
gem.required_ruby_version = '>= 1.9.2'
|
12
11
|
|
13
12
|
gem.license = 'Apache 2.0'
|
14
13
|
|
15
14
|
gem.files = Dir.glob([
|
16
15
|
'core/target/LanguageDetection-*.jar',
|
16
|
+
'core/target/opennlp/*',
|
17
17
|
'core/target/classes/**/*',
|
18
18
|
'exec/**/*',
|
19
19
|
'lib/**/*',
|
@@ -25,12 +25,15 @@ Gem::Specification.new do |gem|
|
|
25
25
|
|
26
26
|
gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
|
27
27
|
|
28
|
-
gem.add_dependency '
|
28
|
+
gem.add_dependency 'newrelic_rpm'
|
29
|
+
|
30
|
+
gem.add_dependency 'opener-daemons', ['~> 2.5', '>= 2.5.6']
|
29
31
|
gem.add_dependency 'opener-webservice', '~> 2.1'
|
30
32
|
|
31
33
|
gem.add_dependency 'builder'
|
32
34
|
gem.add_dependency 'nokogiri'
|
33
35
|
gem.add_dependency 'slop', '~> 3.5'
|
36
|
+
gem.add_dependency 'detect_language'
|
34
37
|
|
35
38
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
36
39
|
gem.add_development_dependency 'rake'
|
metadata
CHANGED
@@ -1,267 +1,304 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-language-identifier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: '0'
|
19
|
+
name: newrelic_rpm
|
20
|
+
prerelease: false
|
21
|
+
type: :runtime
|
15
22
|
version_requirements: !ruby/object:Gem::Requirement
|
16
23
|
requirements:
|
17
|
-
- -
|
24
|
+
- - ">="
|
18
25
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
20
28
|
requirement: !ruby/object:Gem::Requirement
|
21
29
|
requirements:
|
22
|
-
- - ~>
|
30
|
+
- - "~>"
|
23
31
|
- !ruby/object:Gem::Version
|
24
|
-
version: '2.
|
32
|
+
version: '2.5'
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 2.5.6
|
36
|
+
name: opener-daemons
|
25
37
|
prerelease: false
|
26
38
|
type: :runtime
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: opener-webservice
|
29
39
|
version_requirements: !ruby/object:Gem::Requirement
|
30
40
|
requirements:
|
31
|
-
- - ~>
|
41
|
+
- - "~>"
|
32
42
|
- !ruby/object:Gem::Version
|
33
|
-
version: '2.
|
43
|
+
version: '2.5'
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 2.5.6
|
47
|
+
- !ruby/object:Gem::Dependency
|
34
48
|
requirement: !ruby/object:Gem::Requirement
|
35
49
|
requirements:
|
36
|
-
- - ~>
|
50
|
+
- - "~>"
|
37
51
|
- !ruby/object:Gem::Version
|
38
52
|
version: '2.1'
|
53
|
+
name: opener-webservice
|
39
54
|
prerelease: false
|
40
55
|
type: :runtime
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: builder
|
43
56
|
version_requirements: !ruby/object:Gem::Requirement
|
44
57
|
requirements:
|
45
|
-
- -
|
58
|
+
- - "~>"
|
46
59
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
60
|
+
version: '2.1'
|
61
|
+
- !ruby/object:Gem::Dependency
|
48
62
|
requirement: !ruby/object:Gem::Requirement
|
49
63
|
requirements:
|
50
|
-
- -
|
64
|
+
- - ">="
|
51
65
|
- !ruby/object:Gem::Version
|
52
66
|
version: '0'
|
67
|
+
name: builder
|
53
68
|
prerelease: false
|
54
69
|
type: :runtime
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: nokogiri
|
57
70
|
version_requirements: !ruby/object:Gem::Requirement
|
58
71
|
requirements:
|
59
|
-
- -
|
72
|
+
- - ">="
|
60
73
|
- !ruby/object:Gem::Version
|
61
74
|
version: '0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
62
76
|
requirement: !ruby/object:Gem::Requirement
|
63
77
|
requirements:
|
64
|
-
- -
|
78
|
+
- - ">="
|
65
79
|
- !ruby/object:Gem::Version
|
66
80
|
version: '0'
|
81
|
+
name: nokogiri
|
67
82
|
prerelease: false
|
68
83
|
type: :runtime
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
69
89
|
- !ruby/object:Gem::Dependency
|
90
|
+
requirement: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - "~>"
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '3.5'
|
70
95
|
name: slop
|
96
|
+
prerelease: false
|
97
|
+
type: :runtime
|
71
98
|
version_requirements: !ruby/object:Gem::Requirement
|
72
99
|
requirements:
|
73
|
-
- - ~>
|
100
|
+
- - "~>"
|
74
101
|
- !ruby/object:Gem::Version
|
75
102
|
version: '3.5'
|
103
|
+
- !ruby/object:Gem::Dependency
|
76
104
|
requirement: !ruby/object:Gem::Requirement
|
77
105
|
requirements:
|
78
|
-
- -
|
106
|
+
- - ">="
|
79
107
|
- !ruby/object:Gem::Version
|
80
|
-
version: '
|
108
|
+
version: '0'
|
109
|
+
name: detect_language
|
81
110
|
prerelease: false
|
82
111
|
type: :runtime
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rspec
|
85
112
|
version_requirements: !ruby/object:Gem::Requirement
|
86
113
|
requirements:
|
87
|
-
- -
|
114
|
+
- - ">="
|
88
115
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
116
|
+
version: '0'
|
117
|
+
- !ruby/object:Gem::Dependency
|
90
118
|
requirement: !ruby/object:Gem::Requirement
|
91
119
|
requirements:
|
92
|
-
- - ~>
|
120
|
+
- - "~>"
|
93
121
|
- !ruby/object:Gem::Version
|
94
122
|
version: '3.0'
|
123
|
+
name: rspec
|
95
124
|
prerelease: false
|
96
125
|
type: :development
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: rake
|
99
126
|
version_requirements: !ruby/object:Gem::Requirement
|
100
127
|
requirements:
|
101
|
-
- -
|
128
|
+
- - "~>"
|
102
129
|
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
130
|
+
version: '3.0'
|
131
|
+
- !ruby/object:Gem::Dependency
|
104
132
|
requirement: !ruby/object:Gem::Requirement
|
105
133
|
requirements:
|
106
|
-
- -
|
134
|
+
- - ">="
|
107
135
|
- !ruby/object:Gem::Version
|
108
136
|
version: '0'
|
137
|
+
name: rake
|
109
138
|
prerelease: false
|
110
139
|
type: :development
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: cliver
|
113
140
|
version_requirements: !ruby/object:Gem::Requirement
|
114
141
|
requirements:
|
115
|
-
- -
|
142
|
+
- - ">="
|
116
143
|
- !ruby/object:Gem::Version
|
117
144
|
version: '0'
|
145
|
+
- !ruby/object:Gem::Dependency
|
118
146
|
requirement: !ruby/object:Gem::Requirement
|
119
147
|
requirements:
|
120
|
-
- -
|
148
|
+
- - ">="
|
121
149
|
- !ruby/object:Gem::Version
|
122
150
|
version: '0'
|
151
|
+
name: cliver
|
123
152
|
prerelease: false
|
124
153
|
type: :development
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
125
159
|
description: Language identifier for human readable text.
|
126
160
|
email:
|
127
161
|
executables:
|
128
162
|
- language-identifier
|
129
|
-
- language-identifier-server
|
130
163
|
- language-identifier-daemon
|
164
|
+
- language-identifier-server
|
131
165
|
extensions: []
|
132
166
|
extra_rdoc_files: []
|
133
167
|
files:
|
168
|
+
- LICENSE.txt
|
169
|
+
- README.md
|
170
|
+
- bin/language-identifier
|
171
|
+
- bin/language-identifier-daemon
|
172
|
+
- bin/language-identifier-server
|
173
|
+
- config.ru
|
134
174
|
- core/target/LanguageDetection-1.0.0.jar
|
135
|
-
- core/target/
|
136
|
-
- core/target/classes/
|
137
|
-
- core/target/classes/
|
138
|
-
- core/target/classes/
|
139
|
-
- core/target/classes/
|
140
|
-
- core/target/classes/
|
141
|
-
- core/target/classes/
|
142
|
-
- core/target/classes/
|
143
|
-
- core/target/classes/
|
175
|
+
- core/target/classes/com/cybozu/labs/langdetect/Detector.class
|
176
|
+
- core/target/classes/com/cybozu/labs/langdetect/DetectorFactory.class
|
177
|
+
- core/target/classes/com/cybozu/labs/langdetect/ErrorCode.class
|
178
|
+
- core/target/classes/com/cybozu/labs/langdetect/LangDetectException.class
|
179
|
+
- core/target/classes/com/cybozu/labs/langdetect/Language.class
|
180
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/LangProfile.class
|
181
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/Messages.class
|
182
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/NGram.class
|
183
|
+
- core/target/classes/com/cybozu/labs/langdetect/util/messages.properties
|
184
|
+
- core/target/classes/profiles/af
|
185
|
+
- core/target/classes/profiles/ar
|
186
|
+
- core/target/classes/profiles/bg
|
187
|
+
- core/target/classes/profiles/bn
|
144
188
|
- core/target/classes/profiles/ca
|
189
|
+
- core/target/classes/profiles/cs
|
190
|
+
- core/target/classes/profiles/da
|
191
|
+
- core/target/classes/profiles/de
|
192
|
+
- core/target/classes/profiles/el
|
145
193
|
- core/target/classes/profiles/en
|
146
|
-
- core/target/classes/profiles/
|
147
|
-
- core/target/classes/profiles/
|
148
|
-
- core/target/classes/profiles/lt
|
149
|
-
- core/target/classes/profiles/pa
|
194
|
+
- core/target/classes/profiles/es
|
195
|
+
- core/target/classes/profiles/et
|
150
196
|
- core/target/classes/profiles/eu
|
151
|
-
- core/target/classes/profiles/ja
|
152
|
-
- core/target/classes/profiles/he
|
153
|
-
- core/target/classes/profiles/nl
|
154
|
-
- core/target/classes/profiles/sl
|
155
|
-
- core/target/classes/profiles/tr
|
156
|
-
- core/target/classes/profiles/pl
|
157
|
-
- core/target/classes/profiles/sk
|
158
197
|
- core/target/classes/profiles/fa
|
159
|
-
- core/target/classes/profiles/
|
160
|
-
- core/target/classes/profiles/bg
|
161
|
-
- core/target/classes/profiles/it
|
198
|
+
- core/target/classes/profiles/fi
|
162
199
|
- core/target/classes/profiles/fr
|
163
|
-
- core/target/classes/profiles/
|
164
|
-
- core/target/classes/profiles/
|
165
|
-
- core/target/classes/profiles/
|
166
|
-
- core/target/classes/profiles/
|
167
|
-
- core/target/classes/profiles/
|
168
|
-
- core/target/classes/profiles/
|
169
|
-
- core/target/classes/profiles/
|
170
|
-
- core/target/classes/profiles/
|
200
|
+
- core/target/classes/profiles/gu
|
201
|
+
- core/target/classes/profiles/he
|
202
|
+
- core/target/classes/profiles/hi
|
203
|
+
- core/target/classes/profiles/hr
|
204
|
+
- core/target/classes/profiles/hu
|
205
|
+
- core/target/classes/profiles/id
|
206
|
+
- core/target/classes/profiles/it
|
207
|
+
- core/target/classes/profiles/ja
|
171
208
|
- core/target/classes/profiles/kn
|
172
|
-
- core/target/classes/profiles/sw
|
173
209
|
- core/target/classes/profiles/ko
|
174
|
-
- core/target/classes/profiles/
|
210
|
+
- core/target/classes/profiles/lt
|
175
211
|
- core/target/classes/profiles/lv
|
176
|
-
- core/target/classes/profiles/sv
|
177
|
-
- core/target/classes/profiles/so
|
178
|
-
- core/target/classes/profiles/vi
|
179
|
-
- core/target/classes/profiles/et
|
180
|
-
- core/target/classes/profiles/hi
|
181
|
-
- core/target/classes/profiles/af
|
182
|
-
- core/target/classes/profiles/gu
|
183
|
-
- core/target/classes/profiles/zh-cn
|
184
212
|
- core/target/classes/profiles/mk
|
213
|
+
- core/target/classes/profiles/ml
|
214
|
+
- core/target/classes/profiles/mr
|
185
215
|
- core/target/classes/profiles/ne
|
186
|
-
- core/target/classes/profiles/
|
187
|
-
- core/target/classes/profiles/
|
188
|
-
- core/target/classes/profiles/
|
189
|
-
- core/target/classes/profiles/
|
216
|
+
- core/target/classes/profiles/nl
|
217
|
+
- core/target/classes/profiles/no
|
218
|
+
- core/target/classes/profiles/pa
|
219
|
+
- core/target/classes/profiles/pl
|
220
|
+
- core/target/classes/profiles/pt
|
221
|
+
- core/target/classes/profiles/ro
|
222
|
+
- core/target/classes/profiles/ru
|
223
|
+
- core/target/classes/profiles/sk
|
224
|
+
- core/target/classes/profiles/sl
|
225
|
+
- core/target/classes/profiles/so
|
226
|
+
- core/target/classes/profiles/sq
|
227
|
+
- core/target/classes/profiles/sv
|
228
|
+
- core/target/classes/profiles/sw
|
229
|
+
- core/target/classes/profiles/ta
|
230
|
+
- core/target/classes/profiles/te
|
231
|
+
- core/target/classes/profiles/th
|
190
232
|
- core/target/classes/profiles/tl
|
233
|
+
- core/target/classes/profiles/tr
|
234
|
+
- core/target/classes/profiles/uk
|
235
|
+
- core/target/classes/profiles/ur
|
236
|
+
- core/target/classes/profiles/vi
|
237
|
+
- core/target/classes/profiles/zh-cn
|
238
|
+
- core/target/classes/profiles/zh-tw
|
239
|
+
- core/target/classes/short_profiles/ar
|
240
|
+
- core/target/classes/short_profiles/bg
|
241
|
+
- core/target/classes/short_profiles/bn
|
242
|
+
- core/target/classes/short_profiles/ca
|
243
|
+
- core/target/classes/short_profiles/cs
|
244
|
+
- core/target/classes/short_profiles/da
|
245
|
+
- core/target/classes/short_profiles/de
|
246
|
+
- core/target/classes/short_profiles/el
|
247
|
+
- core/target/classes/short_profiles/en
|
248
|
+
- core/target/classes/short_profiles/es
|
249
|
+
- core/target/classes/short_profiles/et
|
250
|
+
- core/target/classes/short_profiles/fa
|
191
251
|
- core/target/classes/short_profiles/fi
|
192
|
-
- core/target/classes/short_profiles/
|
193
|
-
- core/target/classes/short_profiles/
|
252
|
+
- core/target/classes/short_profiles/fr
|
253
|
+
- core/target/classes/short_profiles/gu
|
254
|
+
- core/target/classes/short_profiles/he
|
255
|
+
- core/target/classes/short_profiles/hi
|
194
256
|
- core/target/classes/short_profiles/hr
|
257
|
+
- core/target/classes/short_profiles/hu
|
195
258
|
- core/target/classes/short_profiles/id
|
196
|
-
- core/target/classes/short_profiles/
|
197
|
-
- core/target/classes/short_profiles/no
|
198
|
-
- core/target/classes/short_profiles/ca
|
199
|
-
- core/target/classes/short_profiles/en
|
200
|
-
- core/target/classes/short_profiles/ru
|
201
|
-
- core/target/classes/short_profiles/te
|
202
|
-
- core/target/classes/short_profiles/lt
|
203
|
-
- core/target/classes/short_profiles/pa
|
259
|
+
- core/target/classes/short_profiles/it
|
204
260
|
- core/target/classes/short_profiles/ja
|
205
|
-
- core/target/classes/short_profiles/
|
261
|
+
- core/target/classes/short_profiles/ko
|
262
|
+
- core/target/classes/short_profiles/lt
|
263
|
+
- core/target/classes/short_profiles/lv
|
264
|
+
- core/target/classes/short_profiles/mk
|
265
|
+
- core/target/classes/short_profiles/ml
|
206
266
|
- core/target/classes/short_profiles/nl
|
207
|
-
- core/target/classes/short_profiles/
|
267
|
+
- core/target/classes/short_profiles/no
|
268
|
+
- core/target/classes/short_profiles/pa
|
208
269
|
- core/target/classes/short_profiles/pl
|
209
|
-
- core/target/classes/short_profiles/si
|
210
|
-
- core/target/classes/short_profiles/fa
|
211
|
-
- core/target/classes/short_profiles/de
|
212
|
-
- core/target/classes/short_profiles/bg
|
213
|
-
- core/target/classes/short_profiles/it
|
214
|
-
- core/target/classes/short_profiles/fr
|
215
|
-
- core/target/classes/short_profiles/el
|
216
270
|
- core/target/classes/short_profiles/pt
|
217
|
-
- core/target/classes/short_profiles/uk
|
218
|
-
- core/target/classes/short_profiles/da
|
219
|
-
- core/target/classes/short_profiles/ar
|
220
|
-
- core/target/classes/short_profiles/zh-tw
|
221
|
-
- core/target/classes/short_profiles/sq
|
222
|
-
- core/target/classes/short_profiles/th
|
223
|
-
- core/target/classes/short_profiles/ko
|
224
271
|
- core/target/classes/short_profiles/ro
|
225
|
-
- core/target/classes/short_profiles/
|
272
|
+
- core/target/classes/short_profiles/ru
|
273
|
+
- core/target/classes/short_profiles/si
|
274
|
+
- core/target/classes/short_profiles/sq
|
226
275
|
- core/target/classes/short_profiles/sv
|
276
|
+
- core/target/classes/short_profiles/ta
|
277
|
+
- core/target/classes/short_profiles/te
|
278
|
+
- core/target/classes/short_profiles/th
|
279
|
+
- core/target/classes/short_profiles/tl
|
280
|
+
- core/target/classes/short_profiles/tr
|
281
|
+
- core/target/classes/short_profiles/uk
|
282
|
+
- core/target/classes/short_profiles/ur
|
227
283
|
- core/target/classes/short_profiles/vi
|
228
|
-
- core/target/classes/short_profiles/et
|
229
|
-
- core/target/classes/short_profiles/hi
|
230
|
-
- core/target/classes/short_profiles/gu
|
231
284
|
- core/target/classes/short_profiles/zh-cn
|
232
|
-
- core/target/classes/short_profiles/
|
233
|
-
- core/target/
|
234
|
-
- core/target/
|
235
|
-
- core/target/classes/short_profiles/cs
|
236
|
-
- core/target/classes/short_profiles/bn
|
237
|
-
- core/target/classes/short_profiles/tl
|
238
|
-
- core/target/classes/com/cybozu/labs/langdetect/ErrorCode.class
|
239
|
-
- core/target/classes/com/cybozu/labs/langdetect/LangDetectException.class
|
240
|
-
- core/target/classes/com/cybozu/labs/langdetect/Language.class
|
241
|
-
- core/target/classes/com/cybozu/labs/langdetect/Detector.class
|
242
|
-
- core/target/classes/com/cybozu/labs/langdetect/af
|
243
|
-
- core/target/classes/com/cybozu/labs/langdetect/DetectorFactory.class
|
244
|
-
- core/target/classes/com/cybozu/labs/langdetect/util/NGram.class
|
245
|
-
- core/target/classes/com/cybozu/labs/langdetect/util/LangProfile.class
|
246
|
-
- core/target/classes/com/cybozu/labs/langdetect/util/Messages.class
|
247
|
-
- core/target/classes/com/cybozu/labs/langdetect/util/messages.properties
|
285
|
+
- core/target/classes/short_profiles/zh-tw
|
286
|
+
- core/target/opennlp/langdetect-183.bin
|
287
|
+
- core/target/opennlp/opennlp-tools-1.8.4.jar
|
248
288
|
- exec/language-identifier.rb
|
249
289
|
- lib/opener/language_identifier.rb
|
250
|
-
- lib/opener/language_identifier/
|
251
|
-
- lib/opener/language_identifier/
|
252
|
-
- lib/opener/language_identifier/
|
253
|
-
- lib/opener/language_identifier/detector.rb
|
290
|
+
- lib/opener/language_identifier/backend/detect_language_com.rb
|
291
|
+
- lib/opener/language_identifier/backend/language_detection.rb
|
292
|
+
- lib/opener/language_identifier/backend/opennlp.rb
|
254
293
|
- lib/opener/language_identifier/cli.rb
|
294
|
+
- lib/opener/language_identifier/detector.rb
|
295
|
+
- lib/opener/language_identifier/kaf_builder.rb
|
255
296
|
- lib/opener/language_identifier/public/markdown.css
|
297
|
+
- lib/opener/language_identifier/server.rb
|
298
|
+
- lib/opener/language_identifier/version.rb
|
256
299
|
- lib/opener/language_identifier/views/index.erb
|
257
300
|
- lib/opener/language_identifier/views/result.erb
|
258
|
-
- config.ru
|
259
301
|
- opener-language-identifier.gemspec
|
260
|
-
- README.md
|
261
|
-
- LICENSE.txt
|
262
|
-
- bin/language-identifier
|
263
|
-
- bin/language-identifier-server
|
264
|
-
- bin/language-identifier-daemon
|
265
302
|
homepage: http://opener-project.github.com/
|
266
303
|
licenses:
|
267
304
|
- Apache 2.0
|
@@ -272,19 +309,18 @@ require_paths:
|
|
272
309
|
- lib
|
273
310
|
required_ruby_version: !ruby/object:Gem::Requirement
|
274
311
|
requirements:
|
275
|
-
- -
|
312
|
+
- - ">="
|
276
313
|
- !ruby/object:Gem::Version
|
277
314
|
version: 1.9.2
|
278
315
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
279
316
|
requirements:
|
280
|
-
- -
|
317
|
+
- - ">="
|
281
318
|
- !ruby/object:Gem::Version
|
282
319
|
version: '0'
|
283
320
|
requirements: []
|
284
321
|
rubyforge_project:
|
285
|
-
rubygems_version: 2.
|
322
|
+
rubygems_version: 2.7.9
|
286
323
|
signing_key:
|
287
324
|
specification_version: 4
|
288
325
|
summary: Language identifier for human readable text.
|
289
326
|
test_files: []
|
290
|
-
has_rdoc: yard
|
Binary file
|
@@ -1 +0,0 @@
|
|
1
|
-
{"freq":{"D":9246,"E":2445,"F":2510,"G":3299,"A":6930,"B":3706,"C":2451,"L":2519,"M":3951,"N":3334,"O":2514,"H":3034,"I":2837,"J":2196,"K":3663,"U":687,"T":2336,"W":2258,"V":2714,"Q":182,"P":3097,"S":8234,"R":3039,"Y":252,"X":214,"Z":422,"f":13583,"g":42805,"d":77385,"Feb":207,"e":240974,"b":21626,"c":4896,"a":128566,"n":127153,"o":86673,"l":57433,"m":31352,"j":4048,"k":45378,"h":17527,"i":140621,"w":24930,"v":32618,"u":35166,"t":82606,"s":102389,"r":98861,"q":199,"p":23331,"z":1187,"y":11757,"x":1123,"ï":264,"ë":2903,"ê":1053,"é":765,"á":212,"ü":233,"ö":184,"ó":216,"Eur":318,"Eng":637," l":3565," m":7731," n":16000," o":12065," h":7358," i":23795," j":1325," k":6363," d":33601," e":13358," f":1200," g":11018,"р":242,"с":306," a":8747,"т":161," b":8379," c":434," u":1931," t":8537," w":13128," v":24617," p":4859," s":15482," r":3617," J":2155," K":3559," H":2961," I":2185," N":3120," O":2318," L":2396," M":3803," B":3554," C":2109," A":6365," F":2371," G":3138," D":8986," E":2271,"л":219,"к":266," Z":368," Y":241,"и":371,"о":333,"н":199," S":7708,"Ger":200," R":2881,"в":199," Q":162," P":2912,"а":481," W":2205," V":2322," U":571,"е":266," T":2130,"Fra":1006,"A ":345,"Da":804,"Co":478,"Ch":621,"Du":1025,"Do":201,"De":763,"Di":5828,"Fe":367,"Eu":354,"En":721,"El":212,"Ge":659,"Ga":319,"I ":452,"Fr":1217,"Fo":165,"Fi":216,"II ":246,"C ":278,"Au":486,"Ar":425,"At":187,"As":201,"D ":158,"Ba":648,"Af":2087,"Am":566,"An":491,"Ap":353,"Al":628,"Bu":243,"Br":778,"Ca":399,"Bi":180,"Be":880,"Bo":481,"Bl":161,"Kr":224,"Ko":657,"Le":490,"Li":504,"La":658,"Lu":245,"Lo":347,"Me":800,"Mi":548,"Ma":1360,"Mu":186,"Mo":627,"Ni":257,"Ne":763,"Na":666,"No":1092,"Ok":339,"Ol":206,"Her":157,"Gr":1326,"Go":356,"Ha":534,"He":680,"II":369,"Hi":301,"Ho":503,"Hu":294,"Hy":550,"In":919,"Is":158,"It":218,"Ja":713,"Je":157,"Jo":565,"Ju":623,"Ka":1489,"Ki":194,"Ke":447,"Un":253,"Tu":248,"Tr":236,"To":272,"Th":313,"Te":262,"Ta":276,"V ":280,"Sw":402,"Sy":292,"St":964,"Su":1701,"Wo":181,"Wi":534,"Wa":412,"We":720,"Vo":315,"Vr":251,"Vi":374,"Va":314,"Ve":689,"Pr":551,"S ":157,"Pe":310,"Pa":727,"Po":681,"Pi":230,"Os":236,"Oo":423,"Or":191,"Se":814,"Sc":197,"Si":387,"Sl":222,"Sk":201,"Sp":443,"So":680,"Ru":645,"Ry":194,"Sa":728,"Re":621,"Ri":222,"Ro":746,"SA":233,"Ra":223,"Gre":501,"Gri":383,"Gra":158,"b ":1179,"Gro":254,"a ":7054,"i ":2513,"gd":570,"ge":16432,"ga":1621,"gb":319,"fk":224,"fl":183,"fg":323,"ff":351,"fi":1111,"fh":169,"fs":1224,"fr":2334,"fu":174,"ft":300,"fo":725,"Int":180,"he":6229,"ha":2610,"gn":360,"gl":334,"gi":2135,"gh":921,"gg":418,"gu":592,"gt":1512,"gs":1974,"gr":3459,"go":1385,"dt":211,"du":998,"dw":506,"g ":10256,"ea":936,"eb":3497,"ec":406,"ed":5721,"de":18394,"dd":606,"dg":161,"di":29432,"dh":249,"dj":173,"dm":299,"do":2521,"ds":2062,"dr":1453,"ew":3034,"eu":3603,"ev":2016,"ey":309,"fa":570,"h ":864,"Ind":251,"fd":469,"fe":948,"eh":993,"eg":3187,"ef":995,"ee":12296,"el":15653,"ek":7920,"ei":5726,"ep":2393,"eo":692,"en":27638,"em":4686,"et":10282,"es":15156,"er":33393,"ca":479,"e ":78745,"by":1025,"br":1953,"bu":1057,"bo":2123,"bl":1117,"bi":1966,"bb":156,"be":8513,"db":222,"In ":319,"da":3617,"f ":4067,"ct":207,"co":446,"ck":502,"ci":340,"ch":1526,"ce":547,"c ":311,"az":190,"ay":279,"ba":2057,"d ":15502,"at":11369,"as":9342,"ar":11432,"aw":597,"av":407,"au":883,"ak":2797,"al":9554,"ai":1291,"aj":155,"ap":2087,"am":3989,"an":36357,"ac":615,"ad":4564,"aa":18307,"ab":1064,"ag":2729,"ah":292,"ae":907,"af":1901,"nu":917,"nt":6760,"ns":9243,"nr":212,"no":2885,"nn":1621,"ny":191,"nw":666,"nv":455,"oe":6026,"of":3797,"oc":387,"od":1636,"oa":178,"ob":729,"om":5480,"on":10533,"ok":2525,"ol":5346,"oi":587,"og":2271,"oh":382,"ot":3827,"os":3306,"ov":1152,"ou":2993,"op":4558,"oo":12667,"or":14221,"r ":19504,"ow":1144,"pe":3683,"pg":229,"pa":2371,"pl":1195,"lê":351,"po":1932,"ph":223,"pi":1008,"lo":3369,"lm":315,"ll":2990,"ls":2634,"lp":392,"lw":311,"lv":239,"lu":1548,"lt":993,"ly":716,"o ":2083,"md":261,"ma":3853,"mb":2182,"mg":224,"me":9151,"mi":2940,"mm":802,"mp":1223,"mo":1485,"ië":1437,"mt":249,"ms":966,"mu":1085,"p ":4720,"na":6444,"nb":510,"nc":507,"nd":12581,"ne":5737,"nf":203,"ng":9804,"nh":460,"ni":6127,"nj":300,"nk":2057,"nl":616,"nm":203,"jo":532,"ki":2683,"kh":210,"kg":239,"ke":8584,"ka":6722,"m ":5913,"kw":457,"ky":282,"ks":2318,"kt":2084,"ku":1443,"ko":3908,"kr":2375,"kk":1579,"kl":2200,"km":469,"li":9515,"lh":279,"lk":1158,"lj":705,"le":10290,"ld":1944,"lg":1526,"lf":717,"la":8341,"lb":446,"n ":58065,"hr":313,"ht":702,"hu":1684,"hi":1067,"ho":3048,"dé":160,"id":5034,"ic":1058,"ib":451,"ia":2568,"ig":5540,"if":581,"ie":47836,"hy":348,"k ":9212,"ir":2359,"is":17403,"it":9361,"iu":405,"iv":1008,"iw":219,"ik":8953,"il":3774,"im":1386,"in":25004,"io":1984,"eë":1032,"ip":899,"je":609,"ji":572,"iz":156,"l ":8172,"ja":1960,"wy":994,"z ":242,"wi":1800,"wo":4179,"vy":166,"y ":4684,"wa":9856,"we":6959,"vl":1196,"vi":4040,"vu":178,"vr":662,"vo":4078,"uw":282,"uu":992,"ve":5906,"va":16173,"x ":845,"ui":7822,"uk":678,"ul":2052,"ue":905,"ug":1045,"ur":5410,"us":5098,"ut":907,"um":1711,"un":2596,"up":170,"ty":1434,"tu":2643,"tt":1277,"tw":1177,"tv":217,"ub":1182,"ua":728,"ud":950,"uc":160,"w ":232,"to":5433,"tm":201,"tl":667,"ts":3814,"tr":4026,"tg":532,"te":20430,"tk":279,"tj":177,"ti":5658,"th":1701,"tb":213,"ta":9118,"su":1177,"sv":424,"ss":2799,"st":17122,"sy":1309,"sw":531,"sl":1811,"sk":5006,"sn":242,"sm":693,"sp":2566,"oë":412,"so":3731,"sr":312,"sd":385,"sc":448,"sf":208,"se":15556,"sh":473,"sg":396,"sj":338,"si":8436,"u ":1834,"sa":2367,"sb":577,"rr":652,"rs":6262,"rt":4139,"ru":2543,"rv":1198,"rw":1199,"ry":2450,"rp":1265,"ro":8165,"rn":1586,"rm":2087,"rl":1734,"rk":2996,"ri":11752,"rh":614,"rg":2653,"rf":378,"re":10923,"rd":7372,"rc":234,"rb":955,"ra":7710,"t ":22731,"qu":168,"s ":35284,"px":614,"Hy ":529,"py":231,"pt":765,"pu":844,"pp":1058,"pr":3258,"ps":659,"wê":320,"zi":170,"ze":169,"za":209,"yg":162,"ye":406,"yf":643,"yd":927,"yw":439,"ys":1141,"yn":1041,"yl":288,"yk":1145,"Apr":247,"Aug":272,"Afr":2048,"Ame":464,"Ber":218,"Bel":171,"Bre":163,"Bra":191,"Bri":282,"Des":273,"Daa":460,"Chr":224,"Cha":171,"ër":307,"ël":325,"êr":697,"ë ":1979,"ê ":310,"é ":228,"Dit":1028,"Die":4537,"Dui":918,"Ned":417,"Nas":187,"Nov":238,"Noo":595,"Okt":256,"Oli":158,"Oos":361,"Par":313,"Pro":177,"Pre":186,"SA ":161,"Ita":207,"Jan":348,"Joh":290,"Jul":297,"Jun":245,"Kaa":543,"Kan":220,"Kat":191,"Kar":171,"Ker":270,"Kon":276,"Lat":181,"Lit":162,"Mei":281,"Mar":370,"Maa":286,"Mon":210,"Mid":157,"Wil":165,"Wes":439,"Vry":192,"Vol":161,"êre":674,"Swe":193,"Sy ":252,"Sui":1515,"Sta":443,"Ste":208,"Sep":228,"Spa":253,"Rus":560,"Sch":162,"Rep":214,"Rom":176,"Ver":555,"Uni":236,"The":196,"Tur":159,"bin":400,"blo":205,"bli":525,"bla":215,"boe":246,"boo":276,"bor":587,"bou":330,"ban":283,"bal":289,"bai":191,"baa":372,"bas":270,"bar":272,"beh":366,"beg":372,"bee":325,"bed":285,"ber":1916,"bel":540,"bek":1148,"bew":349,"bev":630,"bes":1308,"bet":510,"bie":1052,"ce ":276,"bri":159,"bro":237,"bra":211,"bre":258,"bru":1062,"bur":584,"by ":693,"am ":1182,"ake":292,"al ":2759,"ain":204,"ak ":856,"aie":241,"agt":446,"anu":467,"ann":632,"ant":1705,"ans":3841,"ane":404,"ang":1856,"ani":742,"anj":191,"ank":961,"ap ":635,"ana":788,"anc":195,"and":5528,"amm":186,"amp":480,"ami":512,"ame":657,"amb":236,"ama":204,"alt":231,"als":160,"all":667,"alk":171,"alg":320,"ali":1276,"ald":217,"ale":2352,"alf":209,"ala":367,"an ":18298,"aks":261,"akt":740,"akl":166,"abe":229,"abi":201,"aby":216,"ae ":624,"aag":175,"aad":172,"aak":679,"aai":350,"aan":6190,"aal":1515,"aam":1083,"aas":579,"aar":5293,"aap":567,"aat":1563,"ad ":2565,"afg":266,"ai ":311,"age":184,"afd":268,"adm":206,"adi":436,"ade":539,"ag ":1304,"ads":176,"ach":166,"ada":249,"af ":494,"at ":6755,"arg":256,"are":965,"ard":1124,"ara":390,"aro":332,"arn":185,"arm":157,"arl":301,"ark":397,"ari":1177,"arv":249,"ars":463,"art":1494,"ary":171,"asi":1669,"ase":210,"aso":169,"ar ":3216,"apa":189,"app":418,"aps":269,"as ":5230,"awe":308,"awi":169,"ata":346,"ast":673,"ass":518,"ato":426,"ate":1382,"ati":871,"ats":404,"atu":409,"aty":167,"aus":156,"jaa":1087,"jar":470,"je ":175,"joe":306,"jin":161,"jie":306,"ito":170,"itt":191,"its":1623,"isk":182,"ism":266,"iss":374,"ist":1582,"ita":608,"ite":1331,"itg":386,"iti":469,"ius":176,"ium":203,"ivi":590,"ive":294,"is ":12546,"ion":1252,"eër":158,"ipa":265,"ir ":1648,"isi":1018,"ise":601,"isa":220,"ire":181,"it ":3772,"kil":644,"kie":536,"kin":914,"km ":266,"kgr":173,"kee":210,"kei":339,"kel":962,"ken":2090,"kep":166,"ker":1342,"ke ":3014,"kra":345,"kse":472,"kry":1085,"kri":662,"kou":249,"kor":369,"kop":214,"koo":391,"kon":866,"kom":903,"kol":246,"koe":157,"ks ":710,"kke":1272,"kki":178,"klu":430,"kle":511,"kla":387,"kli":749,"kat":157,"kar":183,"kas":204,"kap":818,"kan":1256,"kal":611,"kaa":1596,"ka ":1388," Ga":319," Ge":658," Fo":161," Fr":1217," Fi":213," Ha":534," He":680," Go":354," Gr":1318," Hy":549," Hu":294," Ho":502," II":202," Hi":301," Ja":710," Is":157," It":218," In":916,"han":779," Ka":1486,"hal":311," Ke":447,"haw":164," Ki":192,"har":356," Jo":563," Ju":622,"haa":238,"had":164," La":657," Le":488," Li":502," Ko":657," Kr":224," Ma":1348," Mi":547," Me":799,"he ":399," Lo":346," Lu":244," Ne":762," Na":662," Ni":257," Mo":624," Mu":186,"hel":273,"hei":994,"hee":465,"hed":169,"het":2911,"her":350,"hem":255," Ap":349," Am":563," An":491," Al":626," Af":2082," Ba":645," Au":486," At":187," As":200," Ar":422," Be":877,"hie":290," Bi":179," Bl":161," Bo":479," Br":777," Bu":243,"his":173," Ca":384," Ch":612," Co":473," Da":803," Di":5802," De":761," Do":196," Du":1024," El":212," En":720," Eu":354," Fe":367," Wo":179," Wi":530," We":720," Wa":412,"god":193,"gs ":887,"gor":522,"gro":2150,"gra":537,"gri":320,"gre":401," Os":236," Or":191," Oo":422," Po":674," Pi":229," Pe":309," Pa":725,"gst":406," No":1092," Ol":205," Ok":339,"gte":962,"gti":391," Ra":221," Ro":743," Re":620," Ri":222," Pr":547,"gus":284," Sy":292," Sw":400," Su":1700," St":953," Ta":273," Th":307," Te":261," Tr":236," To":270," Ry":194," Ru":645," Sa":724," Si":385," Sc":196," Se":811," So":678," Sp":441," Sk":201," Sl":222," Va":313," Ve":669," Vi":371," Vo":314," Vr":251," Tu":243," Un":253," ja":1102,"ial":357,"ian":256," in":12303,"iaa":736," is":11238," ka":1533," ki":531," ke":481,"id ":2425," ha":612," he":3438," gr":2075," go":365,"ia ":794," hy":292," hi":477," ho":1750," hu":727,"iet":320,"ieu":180,"iew":413," ni":722,"iel":277," ne":437,"ien":998," na":2339,"ier":2228,"ies":4471,"ied":1248,"ief":177,"iek":2103," mu":691,"ig ":1346," mo":667," om":1497," on":2106," of":1952,"ifi":218," no":1205," le":910," li":598," n ":10980," la":1290," ku":387,"ich":258,"ie ":34696," km":407," kl":879,"ica":209," kr":319," ko":1672," me":4100," mi":830,"ids":257," ma":1329," lu":186,"idi":291,"ide":993,"idd":457,"ida":156," lo":197," af":820," aa":2320," ad":269," am":322," an":759," ak":286," al":829," ar":263," at":229," as":2284," ba":599,"il ":459," bi":320," be":5430," bo":565," bl":263," by":612," bu":213," br":340,"ika":2950,"igd":381,"ige":1604,"igh":698,"igi":270,"igg":185,"igt":498,"igs":156,"ik ":2305," en":9738,"imp":231," ei":517," el":502,"ime":187," ek":223," ee":1730,"ind":1030,"ina":506," fa":191,"inn":302," fo":227,"int":638,"ins":1349,"ine":545,"ing":6095," fi":368,"ini":615,"ink":417," ge":8191," ga":169,"inw":455,"ikk":629," ch":185,"ike":1814,"ila":498," da":1923,"in ":12178,"iku":209,"iks":287," do":1111,"ilo":514,"ill":662," dr":523," de":3947,"ilj":228,"ili":684,"ild":294," di":25510,"imb":245,"eë ":693,"io ":196," du":309," wê":298,"hom":166,"hou":360,"hoo":1325,"hoe":410," wy":201,"hul":552,"hui":260,"hri":224,"ht ":578," ru":233," sa":888," se":2315," si":590," sl":329," sk":1250," sp":887," so":2211," ra":237," re":1576," ri":825," ro":614," pr":1589," s ":207," px":614,"hy ":302," ou":447,"hum":674," oo":2639," op":2809," or":325," pe":402," pa":556," pl":641," po":737," lê":242," wa":7840," we":1395," wo":2888," wi":454," va":14670," ve":4043," vo":2359," vr":575," vi":2068," vl":594," ty":439," tw":582," tu":692," ui":1746," ta":895," sw":227," sy":1183," st":4293," su":859," tr":387," to":1857," th":729," ti":190," te":2715,"ffe":165,"fer":157,"fel":155,"fha":158,"fge":290,"fam":176,"fde":429,"eta":359,"ete":1299,"eti":372,"esp":358,"eso":210,"est":2951,"ess":405,"eun":234,"eto":320,"etr":438,"ets":217,"ett":493,"eve":456,"eva":262,"evo":907,"evi":274,"eur":2292,"eus":242,"ewi":337,"ewe":1704,"ewo":449,"ey ":181,"ewa":222,"epe":254,"er ":10617,"epa":228,"eor":221,"es ":4626,"ept":277,"epu":400,"epr":184,"erk":2067,"erl":875,"eri":1765,"erg":1022,"erh":416,"ere":1861,"erf":286,"erd":1514,"era":1470,"erb":529,"et ":6083,"esk":1018,"esl":228,"esi":976,"ese":3607,"eu ":338,"erv":860,"erw":949,"err":349,"ert":1101,"ers":4583,"ern":1142,"erm":861,"erp":342,"ero":382,"ekg":155,"ekk":206,"eko":474,"eks":950,"ekt":701,"en ":13492,"ela":904,"eld":1199,"elf":322,"ele":2593,"eli":1906,"elj":427,"elg":226,"elk":209,"ell":778,"elo":234,"els":1983,"elt":333,"ely":255,"emb":839,"ema":484,"eme":1266,"emo":181,"emi":456,"ep ":699,"ene":1142,"enh":254,"eng":314,"enb":269,"ena":610,"end":3112,"eno":500,"enn":400,"enk":275,"eni":1151,"ens":2864,"ent":2318,"ego":497,"ege":690,"egi":516,"eha":370,"egr":238,"egs":217,"egt":193,"eho":266,"ehe":259,"ek ":1799,"eis":330,"eil":544,"ein":1010,"eie":633,"eid":1307,"el ":3516,"eit":680,"eke":2739,"eka":220,"em ":967,"gin":784,"gie":714,"ght":548,"gep":249,"gen":1564,"get":297,"ger":1248,"ges":2014,"gev":788,"gew":944,"gee":448,"ged":475,"geb":2499,"geh":356,"geg":181,"gem":756,"gel":1995,"gek":350,"gde":427,"ge ":1916,"gaa":266,"gan":539,"ga ":157,"fst":852,"fri":2089,"for":371,"fie":369,"fil":208,"fin":174,"fis":177,"da ":327,"de ":6409,"daa":645,"dag":700,"dae":480,"dat":659,"dan":233,"dam":165,"dde":490,"ch ":316,"cha":160,"ck ":233,"che":490,"ed ":1090,"eba":159,"ebe":354,"ebi":752,"ebo":768,"ebr":1168,"ei ":821,"ega":168,"eek":631,"een":2520,"eel":2072,"eem":410,"eed":587,"ees":884,"eer":3295,"eeu":449,"eet":195,"edi":638,"ede":2561,"eda":161,"eg ":316,"eds":321,"edr":340,"ee ":892,"ef ":280,"dwe":310,"dus":171,"dor":875,"doo":416,"don":160,"dom":227,"ds ":353,"dmi":211,"doe":283,"dst":428,"dui":309,"dri":421,"dra":423,"dry":204,"dsk":181,"dse":527,"dia":294,"der":4829,"des":476,"deu":1676,"dee":1279,"del":1695,"dek":186,"den":1206,"do ":172,"din":875,"dio":177,"dis":425,"dit":656,"die":24964,"dig":1168,"dik":198,"rhe":301,"rga":496,"rgi":335,"rge":595,"ret":312,"res":944,"rg ":777,"rea":245,"ree":1091,"ref":257,"red":294,"rei":545,"reg":1039,"ren":1300,"rek":765,"rel":674,"rep":191,"rf ":180,"rdo":215,"rdi":841,"rde":1873,"re ":2607,"rd ":3667,"ras":532,"rat":587,"rbi":190,"rba":160,"rbe":287,"rag":291,"ran":2011,"ram":317,"ral":832,"rak":247,"raa":1046,"raf":284,"rad":331,"rs ":1922,"ros":273,"rot":330,"rom":305,"ron":1072,"roo":1778,"rop":575,"rou":212,"rov":708,"rod":199,"rol":315,"roe":1277,"rog":195,"rno":196,"rp ":728,"rna":508,"rne":469,"rmo":164,"rma":539,"rme":324,"rmi":175,"rlo":320,"rli":409,"rle":270,"rla":508,"rks":184,"rko":248,"rki":199,"rkl":203,"rke":440,"rka":271,"rm ":692,"rio":174,"rit":493,"ris":571,"riv":501,"rig":863,"ril":278,"rik":3384,"rin":1384,"ria":924,"ric":236,"rie":2029,"rk ":1040,"rwe":410,"rwy":498,"ryf":393,"rui":1143,"rug":256,"rum":244,"ruk":231,"rus":225,"rva":502,"rvl":353,"rvo":192,"rwa":171,"ry ":383,"rsk":872,"rsi":432,"rso":249,"rsp":591,"rsa":225,"rse":478,"rta":186,"rst":1083,"rtk":160,"rto":274,"rte":620,"rti":334,"rua":209,"rty":351,"rt ":1413,"rre":272,"saa":540,"sal":170,"sam":303,"san":408,"sas":204,"sa ":155,"ryw":338,"rys":282,"ryk":576,"sge":305,"sie":4039,"sid":185,"sia":299,"sit":436,"sis":296,"sip":279,"sin":541,"sio":799,"sil":194,"sim":173,"sik":231,"sif":160,"sig":289,"sbu":231,"se ":9840,"sch":268,"ser":501,"ses":400,"set":250,"seu":239,"sea":162,"see":618,"sed":264,"sen":1323,"sem":298,"sel":1093,"sek":186,"spo":405,"spr":756,"spe":934,"spa":260,"sow":508,"som":247,"son":545,"soo":954,"soe":195,"sok":377,"st ":267,"sli":202,"slu":297,"sky":183,"sla":1006,"sle":205,"ski":804,"sko":594,"skr":1152,"sku":244,"ska":1212,"ske":665,"sië":283,"sma":173,"sme":382,"sse":1275,"ssa":198,"ssi":922,"ste":6829,"sta":5065,"sto":805,"sti":1396,"stu":693,"str":1673,"sty":226,"sui":596,"sve":167,"sy ":1199,"swa":313,"tal":1301,"taa":2499,"tad":2323,"tau":165,"tat":456,"tas":164,"tan":1021,"te ":8469,"ta ":339,"pa ":202,"pe ":459,"par":608,"pas":176,"paa":333,"pal":324,"pan":428,"pge":207,"pen":295,"per":1379,"pes":438,"pee":201,"pel":568,"pla":660,"lê ":268,"pli":169,"ple":241,"pie":480,"por":394,"poo":160,"pos":197,"pol":518,"ppy":184,"ppe":636,"pst":229,"pub":435,"pte":575,"pra":251,"pri":484,"pre":726,"pro":1677,"pun":246,"px ":614,"py ":166,"ra ":424,"ngo":161,"ngr":289,"ngs":1292,"nge":2327,"nhe":276,"nel":314,"nen":189,"nem":225,"ner":1014,"net":468,"nes":533,"ng ":4906,"nee":762,"nce":206,"ne ":1530,"ndr":216,"nds":657,"ndo":326,"ndi":878,"nde":5081,"nda":453,"nal":790,"nam":291,"nad":316,"naf":372,"nab":229,"naa":1198,"nd ":4245,"nat":282,"nas":677,"na ":1572,"nwo":542,"nus":209,"nua":266,"ntw":393,"nto":201,"nts":300,"ntr":543,"nti":571,"ntl":164,"nta":457,"nte":1815,"nst":787,"nse":3345,"nsi":1079,"nsl":207,"nsk":498,"nt ":1757,"ns ":2476,"nog":456,"noe":477,"noo":659,"nom":368,"nne":904,"nni":442,"nië":246,"nli":373,"nke":345,"nkl":391,"nks":179,"nkr":453,"nje":156,"nig":640,"nie":1831,"nk ":274,"niv":210,"nis":1512,"nin":804,"ogr":272,"ogi":423,"oi ":216,"oha":228,"oeë":178,"ok ":1432,"ol ":554,"ock":164,"oe ":303,"ode":551,"odi":176,"ods":177,"of ":2323,"oek":499,"oel":276,"oem":563,"oeg":231,"oei":336,"oer":752,"oes":295,"oet":302,"oen":602,"oep":714,"odu":188,"oed":477,"og ":895,"ofs":803,"oew":261,"od ":254,"obe":382,"oud":510,"oue":197,"ote":350,"ott":175,"ots":913,"oto":266,"ost":637,"osi":266,"ose":346,"oss":176,"oso":190,"owa":484,"owe":208,"ovi":678,"ove":370,"ous":302,"our":167,"out":306,"opo":205,"opp":449,"ope":438,"opg":213,"opa":195,"os ":1171,"oon":731,"ool":561,"oom":393,"ook":1376,"ooi":288,"oof":1146,"oog":389,"ood":288,"or ":1152,"oot":1351,"oos":958,"oor":4776,"oop":341,"ork":260,"orl":386,"orm":964,"orp":858,"ord":4583,"ore":773,"org":587,"ori":1212,"ou ":999,"ort":1219,"ors":871,"orw":195,"ot ":1528,"orb":186,"ora":235,"ola":171,"on ":1522,"oli":772,"oll":288,"olk":702,"ole":263,"olg":904,"ols":270,"olo":636,"om ":1870,"okk":553,"ona":980,"ond":1915,"one":1178,"ong":620,"oni":1012,"onl":220,"onk":232,"onn":184,"ono":391,"ons":511,"ont":1339,"oma":425,"ome":845,"omi":324,"omm":454,"omp":297,"oms":595,"op ":2264,"la ":334,"le ":3834,"lf ":175,"lde":601,"laa":982,"lad":180,"lag":434,"lak":490,"lan":4154,"lar":155,"lat":361,"las":433,"ld ":695,"kus":410,"kun":548,"kul":242,"kwe":204,"kwa":191,"kte":822,"kst":257,"ksi":463,"ktr":342,"ktu":210,"kti":247,"kto":369,"ls ":1008,"lon":293,"lom":430,"loo":382,"loe":423,"log":655,"los":274,"lië":349,"lti":157,"lub":411,"lug":221,"lst":643,"lte":252,"lse":623,"lge":754,"lew":250,"leu":193,"les":329,"let":347,"ler":415,"lem":358,"len":1056,"lek":605,"lei":1010,"leg":257,"lee":477,"led":218,"lg ":483,"lo ":169,"lla":325,"lle":1578,"lli":615,"lke":200,"lki":447,"ljo":223,"ll ":176,"lja":430,"lit":831,"lis":504,"leë":449,"lin":1208,"lim":201,"lid":165,"lia":364,"lik":2917,"lig":818,"lie":1618,"ma ":226,"mb ":655,"maa":1244,"mag":221,"mar":331,"mas":207,"mal":270,"man":726,"mat":394,"mba":172,"mbi":179,"mbe":814,"mbo":161,"me ":936,"mde":163,"med":223,"mee":1533,"met":2981,"mes":247,"mer":991,"mel":330,"men":1550,"lui":390,"lus":194,"lwe":213,"lyk":221,"lyn":187,"mpi":220,"mpe":208,"mpo":176,"mpt":267,"ms ":488,"moe":196,"mod":233,"mon":329,"mst":248,"mus":488,"mun":417,"ër ":180,"mge":191,"min":806,"mil":465,"mit":231,"mig":184,"mie":523,"mid":310,"ië ":1136,"mme":353,"wêr":319,"yst":183,"ys ":680,"ywe":370,"ye ":306,"yf ":380,"yde":281,"yds":165,"yd ":230,"yn ":461,"yns":175,"yk ":810,"wys":531,"wor":2620,"woo":760,"won":526,"we ":1260,"wes":799,"wer":1583,"wet":305,"wen":427,"wel":545,"weg":270,"wee":1257,"wis":166,"wit":342,"wie":194,"win":417,"wil":177,"wik":231,"wan":300,"wat":5174,"war":532,"was":2236,"waa":1031,"vry":194,"vro":313,"vir":1570,"vin":921,"vie":880,"vis":289,"vla":709,"vlo":280,"voe":444,"vol":1592,"voo":1083,"vor":625,"ver":4566,"ven":170,"vem":236,"vel":250,"vee":302,"val":319,"van":14723,"vat":155,"vaa":414,"uwe":229,"uur":863,"usl":180,"usi":606,"use":380,"ust":585,"uss":1129,"ute":176,"uto":171,"us ":1998,"ure":395,"urg":669,"uri":191,"urk":167,"uro":352,"urs":211,"urt":189,"ur ":2547,"umb":689,"ume":172,"unt":325,"uns":289,"uni":820,"und":530,"um ":614,"ult":270,"ull":459,"uli":358,"un ":219,"uid":2285,"uik":850,"uim":162,"uis":508,"uk ":200,"uit":3378,"ul ":272,"ugb":161,"ugu":278,"ude":184,"udi":240,"ue ":322,"ug ":159,"ub ":406,"uar":522,"ubl":464,"ud ":181,"tyn":228,"ty ":384,"tur":232,"tus":988,"tuu":617,"tui":232,"tud":171,"tyd":628,"twi":269,"twe":751,"ts ":533,"tre":1022,"tra":1128,"tri":607,"tru":366,"tro":780,"tse":746,"tsk":298,"tsl":425,"tst":993,"tte":641,"tti":226,"to ":272,"tof":244,"toe":713,"tob":268,"tot":1108,"tom":182,"ton":586,"tol":317,"tor":808,"too":280,"til":187,"tik":334,"tie":1846,"tig":1053,"tis":241,"tin":826,"tio":267,"thu":695,"tkl":165,"tli":191,"tla":301,"tem":732,"ten":1059,"tei":844,"tek":528,"tel":2135,"tee":779,"teg":166,"ted":237,"th ":270,"teu":212,"tes":357,"ter":4231,"tge":442,"the":380},"n_words":[1541130,1808182,1328687],"name":"af"}
|