cld3 3.2.4 → 3.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +1 -1
  3. data/LICENSE +2 -2
  4. data/README.md +3 -3
  5. data/cld3.gemspec +6 -6
  6. data/ext/cld3/Makefile +266 -0
  7. data/ext/cld3/base.o +0 -0
  8. data/ext/cld3/embedding_feature_extractor.o +0 -0
  9. data/ext/cld3/embedding_network.o +0 -0
  10. data/ext/cld3/extconf.rb +3 -2
  11. data/ext/cld3/feature_extractor.o +0 -0
  12. data/ext/cld3/feature_extractor.pb.o +0 -0
  13. data/ext/cld3/feature_types.o +0 -0
  14. data/ext/cld3/fixunicodevalue.o +0 -0
  15. data/ext/cld3/fml_parser.o +0 -0
  16. data/ext/cld3/generated_entities.o +0 -0
  17. data/ext/cld3/generated_ulscript.o +0 -0
  18. data/ext/cld3/getonescriptspan.h +1 -1
  19. data/ext/cld3/getonescriptspan.o +0 -0
  20. data/ext/cld3/lang_id_nn_params.o +0 -0
  21. data/ext/cld3/language_identifier_features.o +0 -0
  22. data/ext/cld3/libcld3.def +8 -0
  23. data/ext/cld3/libcld3.so +0 -0
  24. data/ext/cld3/mkmf.log +37 -0
  25. data/ext/cld3/nnet_language_identifier.cc +8 -0
  26. data/ext/cld3/nnet_language_identifier.h +16 -0
  27. data/ext/cld3/nnet_language_identifier.o +0 -0
  28. data/ext/cld3/nnet_language_identifier_c.cc +71 -23
  29. data/ext/cld3/nnet_language_identifier_c.o +0 -0
  30. data/ext/cld3/offsetmap.o +0 -0
  31. data/ext/cld3/registry.o +0 -0
  32. data/ext/cld3/relevant_script_feature.o +0 -0
  33. data/ext/cld3/sentence.pb.o +0 -0
  34. data/ext/cld3/sentence_features.o +0 -0
  35. data/ext/cld3/task_context.o +0 -0
  36. data/ext/cld3/task_context_params.o +0 -0
  37. data/ext/cld3/task_spec.pb.o +0 -0
  38. data/ext/cld3/text_processing.o +0 -0
  39. data/ext/cld3/unicodetext.o +0 -0
  40. data/ext/cld3/utf8statetable.o +0 -0
  41. data/ext/cld3/utils.o +0 -0
  42. data/ext/cld3/workspace.o +0 -0
  43. data/lib/cld3.rb +96 -16
  44. metadata +45 -13
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 91da77d7c9125f43c76c2645b04701b4c72a955859cd5cef23b9e846119e1112
4
- data.tar.gz: 2090d645b5ae57212b496c41565031624e0987992b1cb616bdbfa5991244f55a
3
+ metadata.gz: f5b3cc203abda97cb85d5dee0983b7f63c626397b8af8b90e2110bb5fedbbdec
4
+ data.tar.gz: 197f66798925404ded7af722d0194a705018d6953b11f4576c4e180ea093675d
5
5
  SHA512:
6
- metadata.gz: e9dd9783d699ee4653b1f808405b655425d1286dd6be663a965e4e448874dfd811eab73dd82f2a0c5e0ef91de8d4df62ffb2228bddb1642a57f9e1d01172115c
7
- data.tar.gz: 2d4e703a21feba6b6ef95d5afbeb2f1748e5135dd120ba6a7976383b650659db4501045b2ffded8ff8575312a2a3f6cf092552d5833c5f435a122ae43e7c4a4e
6
+ metadata.gz: 855e8ee464a2842906bfef211e2afb21820fe9a7449b58d91b9ab1908c997966b9dd4c2d5d51f82ceb84b65b5a118736a5aa4eff6ea9548b9a9abc61b297a9d0
7
+ data.tar.gz: e38ddfd81489aeb83bccc7b509dd17ea79c56ba641de37cac2d800d3428ed31e5ac57066016bd118e9e71c30c78d31b4c38a266abe012065495558adf07e68f5
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
1
+ # Copyright 2017 Akihiko Odaki <akihiko.odaki@gmail.com>
2
2
  # All Rights Reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright 2017 Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
1
+ Copyright 2017 Akihiko Odaki <akihiko.odaki@gmail.com>
2
2
  All rights reserved.
3
3
 
4
4
  Apache License
@@ -189,7 +189,7 @@ All rights reserved.
189
189
  same "printed page" as the copyright notice for easier
190
190
  identification within third-party archives.
191
191
 
192
- Copyright 2017, Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
192
+ Copyright 2017, Akihiko Odaki <akihiko.odaki@gmail.com>
193
193
 
194
194
  Licensed under the Apache License, Version 2.0 (the "License");
195
195
  you may not use this file except in compliance with the License.
data/README.md CHANGED
@@ -8,11 +8,11 @@ require 'cld3'
8
8
 
9
9
  cld3 = CLD3::NNetLanguageIdentifier.new(0, 1000)
10
10
 
11
- cld3.find_language("こんにちは") # => #<struct Struct::Result language=:ja, probability=1.0, reliable?=true, proportion=1.0>
11
+ cld3.find_language("こんにちは") # => #<struct Struct::Result language=:ja, probability=1.0, reliable?=true, proportion=1.0, byte_ranges=[]>
12
12
 
13
- cld3.find_language("This is a pen.") # => #<struct Struct::Result language=:en, probability=0.9999408721923828, reliable?=true, proportion=1.0>
13
+ cld3.find_language("This is a pen.") # => #<struct Struct::Result language=:en, probability=0.9999408721923828, reliable?=true, proportion=1.0, byte_ranges=[]>
14
14
 
15
- cld3.find_language("здравствуйте") # => #<struct Struct::Result language=:ru, probability=0.3140212297439575, reliable?=false, proportion=1.0>
15
+ cld3.find_language("здравствуйте") # => #<struct Struct::Result language=:ru, probability=0.3140212297439575, reliable?=false, proportion=1.0, byte_ranges=[]>
16
16
  ```
17
17
 
18
18
  ## Installation
data/cld3.gemspec CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
1
+ # Copyright 2017 Akihiko Odaki <akihiko.odaki@gmail.com>
2
2
  # All Rights Reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,16 +16,16 @@
16
16
 
17
17
  Gem::Specification.new do |gem|
18
18
  gem.name = "cld3"
19
- gem.version = "3.2.4"
19
+ gem.version = "3.4.2"
20
20
  gem.summary = "Compact Language Detector v3 (CLD3)"
21
21
  gem.description = "Compact Language Detector v3 (CLD3) is a neural network model for language identification."
22
22
  gem.license = "Apache-2.0"
23
23
  gem.homepage = "https://github.com/akihikodaki/cld3-ruby"
24
24
  gem.author = "Akihiko Odaki"
25
- gem.email = "akihiko.odaki.4i@stu.hosei.ac.jp"
26
- gem.required_ruby_version = [ ">= 2.3.0", "< 2.7.0" ]
27
- gem.add_dependency "ffi", [ ">= 1.1.0", "< 1.11.0" ]
28
- gem.add_development_dependency "rspec", [ ">=3.0.0", "< 3.9.0" ]
25
+ gem.email = "akihiko.odaki@gmail.com"
26
+ gem.required_ruby_version = [ ">= 2.6.0", "< 3.1.0" ]
27
+ gem.add_dependency "ffi", [ ">= 1.1.0", "< 1.16.0" ]
28
+ gem.add_development_dependency "rspec", [ ">=3.0.0", "< 3.11.0" ]
29
29
  gem.files = Dir[
30
30
  "Gemfile", "LICENSE", "LICENSE_CLD3", "README.md",
31
31
  "cld3.gemspec", "ext/**/*", "lib/**/*"
data/ext/cld3/Makefile ADDED
@@ -0,0 +1,266 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ # V=0 quiet, V=1 verbose. other values don't work.
5
+ V = 1
6
+ Q1 = $(V:1=)
7
+ Q = $(Q1:0=@)
8
+ ECHO1 = $(V:1=@ :)
9
+ ECHO = $(ECHO1:0=@ echo)
10
+ NULLCMD = :
11
+
12
+ #### Start of system configuration section. ####
13
+
14
+ srcdir = .
15
+ topdir = /usr/include
16
+ hdrdir = $(topdir)
17
+ arch_hdrdir = /usr/include
18
+ PATH_SEPARATOR = :
19
+ VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
20
+ prefix = $(DESTDIR)/usr
21
+ rubysitearchprefix = $(sitearchlibdir)/$(RUBY_BASE_NAME)
22
+ rubyarchprefix = $(DESTDIR)/usr/lib64/ruby
23
+ rubylibprefix = $(exec_prefix)/share/ruby
24
+ exec_prefix = $(DESTDIR)/usr
25
+ vendorarchhdrdir = $(vendorhdrdir)/$(arch)
26
+ sitearchhdrdir = $(sitehdrdir)/$(arch)
27
+ rubyarchhdrdir = $(DESTDIR)/usr/include
28
+ vendorhdrdir = $(rubyhdrdir)/vendor_ruby
29
+ sitehdrdir = $(rubyhdrdir)/site_ruby
30
+ rubyhdrdir = $(DESTDIR)/usr/include
31
+ rubygemsdir = $(DESTDIR)/usr/share/rubygems
32
+ vendorarchdir = $(DESTDIR)/usr/lib64/ruby/vendor_ruby
33
+ vendorlibdir = $(vendordir)
34
+ vendordir = $(DESTDIR)/usr/share/ruby/vendor_ruby
35
+ sitearchdir = $(DESTDIR)/usr/local/lib64/ruby/site_ruby
36
+ sitelibdir = $(sitedir)
37
+ sitedir = $(DESTDIR)/usr/local/share/ruby/site_ruby
38
+ rubyarchdir = $(rubyarchprefix)
39
+ rubylibdir = $(rubylibprefix)
40
+ sitearchincludedir = $(includedir)/$(sitearch)
41
+ archincludedir = $(includedir)/$(arch)
42
+ sitearchlibdir = $(libdir)/$(sitearch)
43
+ archlibdir = $(DESTDIR)/usr/lib64
44
+ ridir = $(datarootdir)/$(RI_BASE_NAME)
45
+ mandir = $(DESTDIR)/usr/share/man
46
+ localedir = $(datarootdir)/locale
47
+ libdir = $(exec_prefix)/lib64
48
+ psdir = $(docdir)
49
+ pdfdir = $(docdir)
50
+ dvidir = $(docdir)
51
+ htmldir = $(docdir)
52
+ infodir = $(DESTDIR)/usr/share/info
53
+ docdir = $(datarootdir)/doc/$(PACKAGE)
54
+ oldincludedir = $(DESTDIR)/usr/include
55
+ includedir = $(DESTDIR)/usr/include
56
+ localstatedir = $(DESTDIR)/var
57
+ sharedstatedir = $(DESTDIR)/var/lib
58
+ sysconfdir = $(DESTDIR)/etc
59
+ datadir = $(DESTDIR)/usr/share
60
+ datarootdir = $(prefix)/share
61
+ libexecdir = $(DESTDIR)/usr/libexec
62
+ sbindir = $(DESTDIR)/usr/sbin
63
+ bindir = $(exec_prefix)/bin
64
+ archdir = $(rubyarchdir)
65
+
66
+
67
+ CC_WRAPPER =
68
+ CC = gcc
69
+ CXX = g++
70
+ LIBRUBY = $(LIBRUBY_SO)
71
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
72
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
73
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static $(MAINLIBS)
74
+ empty =
75
+ OUTFLAG = -o $(empty)
76
+ COUTFLAG = -o $(empty)
77
+ CSRCFLAG = $(empty)
78
+
79
+ RUBY_EXTCONF_H =
80
+ cflags = $(optflags) $(debugflags) $(warnflags)
81
+ cxxflags =
82
+ optflags = -O3
83
+ debugflags = -ggdb3
84
+ warnflags = -Wall -Wextra -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wwrite-strings -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable
85
+ cppflags =
86
+ CCDLFLAGS = -fPIC
87
+ CFLAGS = $(CCDLFLAGS) -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fPIC $(ARCH_FLAG)
88
+ INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
89
+ DEFS =
90
+ CPPFLAGS = $(DEFS) $(cppflags)
91
+ CXXFLAGS = $(CCDLFLAGS) -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fvisibility=hidden -std=c++11 $(ARCH_FLAG)
92
+ ldflags = -L. -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld -fstack-protector-strong -rdynamic -Wl,-export-dynamic
93
+ dldflags = -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld
94
+ ARCH_FLAG =
95
+ DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
96
+ LDSHARED = $(CC) -shared
97
+ LDSHAREDXX = $(CXX) -shared
98
+ AR = ar
99
+ EXEEXT =
100
+
101
+ RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
102
+ RUBY_SO_NAME = ruby
103
+ RUBYW_INSTALL_NAME =
104
+ RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version_dir_name)
105
+ RUBYW_BASE_NAME = rubyw
106
+ RUBY_BASE_NAME = ruby
107
+
108
+ arch = aarch64-linux
109
+ sitearch = $(arch)
110
+ ruby_version = 2.7.0
111
+ ruby = $(bindir)/$(RUBY_BASE_NAME)
112
+ RUBY = $(ruby)
113
+ ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
114
+
115
+ RM = rm -f
116
+ RM_RF = $(RUBY) -run -e rm -- -rf
117
+ RMDIRS = rmdir --ignore-fail-on-non-empty -p
118
+ MAKEDIRS = /usr/bin/mkdir -p
119
+ INSTALL = /usr/bin/install -c
120
+ INSTALL_PROG = $(INSTALL) -m 0755
121
+ INSTALL_DATA = $(INSTALL) -m 644
122
+ COPY = cp
123
+ TOUCH = exit >
124
+
125
+ #### End of system configuration section. ####
126
+
127
+ preload =
128
+ libpath = . $(archlibdir)
129
+ LIBPATH = -L. -L$(archlibdir)
130
+ DEFFILE =
131
+
132
+ CLEANFILES = mkmf.log
133
+ DISTCLEANFILES =
134
+ DISTCLEANDIRS =
135
+
136
+ extout =
137
+ extout_prefix =
138
+ target_prefix =
139
+ LOCAL_LIBS =
140
+ LIBS = -lprotobuf -lpthread -lm -lc
141
+ ORIG_SRCS = base.cc embedding_feature_extractor.cc embedding_network.cc feature_extractor.cc feature_extractor.pb.cc feature_types.cc fixunicodevalue.cc fml_parser.cc generated_entities.cc generated_ulscript.cc getonescriptspan.cc lang_id_nn_params.cc language_identifier_features.cc nnet_language_identifier.cc nnet_language_identifier_c.cc offsetmap.cc registry.cc relevant_script_feature.cc sentence.pb.cc sentence_features.cc task_context.cc task_context_params.cc task_spec.pb.cc text_processing.cc unicodetext.cc utf8statetable.cc utils.cc workspace.cc
142
+ SRCS = $(ORIG_SRCS)
143
+ OBJS = base.o embedding_feature_extractor.o embedding_network.o feature_extractor.o feature_extractor.pb.o feature_types.o fixunicodevalue.o fml_parser.o generated_entities.o generated_ulscript.o getonescriptspan.o lang_id_nn_params.o language_identifier_features.o nnet_language_identifier.o nnet_language_identifier_c.o offsetmap.o registry.o relevant_script_feature.o sentence.pb.o sentence_features.o task_context.o task_context_params.o task_spec.pb.o text_processing.o unicodetext.o utf8statetable.o utils.o workspace.o
144
+ HDRS = $(srcdir)/base.h $(srcdir)/casts.h $(srcdir)/embedding_feature_extractor.h $(srcdir)/embedding_network.h $(srcdir)/embedding_network_params.h $(srcdir)/feature_extractor.h $(srcdir)/feature_types.h $(srcdir)/float16.h $(srcdir)/fml_parser.h $(srcdir)/language_identifier_features.h $(srcdir)/lang_id_nn_params.h $(srcdir)/nnet_language_identifier.h $(srcdir)/registry.h $(srcdir)/relevant_script_feature.h $(srcdir)/script_detector.h $(srcdir)/sentence_features.h $(srcdir)/simple_adder.h $(srcdir)/fixunicodevalue.h $(srcdir)/generated_ulscript.h $(srcdir)/getonescriptspan.h $(srcdir)/integral_types.h $(srcdir)/offsetmap.h $(srcdir)/port.h $(srcdir)/stringpiece.h $(srcdir)/text_processing.h $(srcdir)/utf8acceptinterchange.h $(srcdir)/utf8prop_lettermarkscriptnum.h $(srcdir)/utf8repl_lettermarklower.h $(srcdir)/utf8scannot_lettermarkspecial.h $(srcdir)/utf8statetable.h $(srcdir)/task_context.h $(srcdir)/task_context_params.h $(srcdir)/unicodetext.h $(srcdir)/utils.h $(srcdir)/workspace.h $(srcdir)/feature_extractor.pb.h $(srcdir)/sentence.pb.h $(srcdir)/task_spec.pb.h
145
+ LOCAL_HDRS =
146
+ TARGET = libcld3
147
+ TARGET_NAME = libcld3
148
+ TARGET_ENTRY = Init_$(TARGET_NAME)
149
+ DLLIB = $(TARGET).so
150
+ EXTSTATIC =
151
+ STATIC_LIB =
152
+
153
+ TIMESTAMP_DIR = .
154
+ BINDIR = $(bindir)
155
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
156
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
157
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
158
+ HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
159
+ ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
160
+ TARGET_SO_DIR =
161
+ TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
162
+ CLEANLIBS = $(TARGET_SO)
163
+ CLEANOBJS = *.o *.bak
164
+
165
+ all: $(DLLIB)
166
+ static: $(STATIC_LIB)
167
+ .PHONY: all install static install-so install-rb
168
+ .PHONY: clean clean-so clean-static clean-rb
169
+
170
+ clean-static::
171
+ clean-rb-default::
172
+ clean-rb::
173
+ clean-so::
174
+ clean: clean-so clean-static clean-rb-default clean-rb
175
+ -$(Q)$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
176
+
177
+ distclean-rb-default::
178
+ distclean-rb::
179
+ distclean-so::
180
+ distclean-static::
181
+ distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
182
+ -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
183
+ -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
184
+ -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
185
+
186
+ realclean: distclean
187
+ install: install-so install-rb
188
+
189
+ install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
190
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
191
+ clean-static::
192
+ -$(Q)$(RM) $(STATIC_LIB)
193
+ install-rb: pre-install-rb do-install-rb install-rb-default
194
+ install-rb-default: pre-install-rb-default do-install-rb-default
195
+ pre-install-rb: Makefile
196
+ pre-install-rb-default: Makefile
197
+ do-install-rb:
198
+ do-install-rb-default:
199
+ pre-install-rb-default:
200
+ @$(NULLCMD)
201
+ $(TIMESTAMP_DIR)/.sitearchdir.time:
202
+ $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
203
+ $(Q) $(TOUCH) $@
204
+
205
+ site-install: site-install-so site-install-rb
206
+ site-install-so: install-so
207
+ site-install-rb: install-rb
208
+
209
+ .SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
210
+
211
+ .cc.o:
212
+ $(ECHO) compiling $(<)
213
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
214
+
215
+ .cc.S:
216
+ $(ECHO) translating $(<)
217
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
218
+
219
+ .mm.o:
220
+ $(ECHO) compiling $(<)
221
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
222
+
223
+ .mm.S:
224
+ $(ECHO) translating $(<)
225
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
226
+
227
+ .cxx.o:
228
+ $(ECHO) compiling $(<)
229
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
230
+
231
+ .cxx.S:
232
+ $(ECHO) translating $(<)
233
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
234
+
235
+ .cpp.o:
236
+ $(ECHO) compiling $(<)
237
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
238
+
239
+ .cpp.S:
240
+ $(ECHO) translating $(<)
241
+ $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
242
+
243
+ .c.o:
244
+ $(ECHO) compiling $(<)
245
+ $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
246
+
247
+ .c.S:
248
+ $(ECHO) translating $(<)
249
+ $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
250
+
251
+ .m.o:
252
+ $(ECHO) compiling $(<)
253
+ $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
254
+
255
+ .m.S:
256
+ $(ECHO) translating $(<)
257
+ $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
258
+
259
+ $(TARGET_SO): $(OBJS) Makefile
260
+ $(ECHO) linking shared-object $(DLLIB)
261
+ -$(Q)$(RM) $(@)
262
+ $(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
263
+
264
+
265
+
266
+ $(OBJS): $(HDRS) $(ruby_headers)
data/ext/cld3/base.o ADDED
Binary file
Binary file
data/ext/cld3/extconf.rb CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
1
+ # Copyright 2017 Akihiko Odaki <akihiko.odaki@gmail.com>
2
2
  # All Rights Reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -33,7 +33,7 @@ FileUtils.mkdir_p("cld_3/protos")
33
33
  FileUtils.mkdir_p("script_span")
34
34
 
35
35
  [ "feature_extractor", "sentence", "task_spec" ].each {|name|
36
- `protoc '#{name}.proto' --cpp_out=.`
36
+ system "protoc", "#{name}.proto", "--cpp_out=.", exception: true
37
37
  ln_fallback("#{name}.pb.h", "cld_3/protos/#{name}.pb.h")
38
38
  }
39
39
 
@@ -56,4 +56,5 @@ FileUtils.mkdir_p("script_span")
56
56
  }
57
57
 
58
58
  $CXXFLAGS += " -fvisibility=hidden -std=c++11"
59
+ $LIBRUBYARG = ""
59
60
  create_makefile("libcld3")
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -93,7 +93,7 @@ class ScriptScanner {
93
93
  // again with the first byte of the following range.
94
94
  int MapBack(int text_offset);
95
95
 
96
- const char* GetBufferStart() {return start_byte_;};
96
+ const char* GetBufferStart() {return start_byte_;}
97
97
 
98
98
  private:
99
99
  // Skip over tags and non-letters
Binary file
Binary file
@@ -0,0 +1,8 @@
1
+ EXPORTS
2
+ NNetLanguageIdentifier_find_language
3
+ NNetLanguageIdentifier_find_top_n_most_freq_langs
4
+ delete_NNetLanguageIdentifier
5
+ delete_result
6
+ delete_results
7
+ new_NNetLanguageIdentifier
8
+ refer_to_nth_result
Binary file
data/ext/cld3/mkmf.log ADDED
@@ -0,0 +1,37 @@
1
+ "pkg-config --exists protobuf"
2
+ | pkg-config --libs protobuf
3
+ => "-lprotobuf -lpthread \n"
4
+ "gcc -o conftest -I/usr/include -I/usr/include/ruby/backward -I/usr/include -I. -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fPIC conftest.c -L. -L/usr/lib64 -L. -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld -fstack-protector-strong -rdynamic -Wl,-export-dynamic -lruby -lm -lc"
5
+ checked program was:
6
+ /* begin */
7
+ 1: #include "ruby.h"
8
+ 2:
9
+ 3: int main(int argc, char **argv)
10
+ 4: {
11
+ 5: return !!argv[argc];
12
+ 6: }
13
+ /* end */
14
+
15
+ "gcc -o conftest -I/usr/include -I/usr/include/ruby/backward -I/usr/include -I. -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fPIC conftest.c -L. -L/usr/lib64 -L. -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld -fstack-protector-strong -rdynamic -Wl,-export-dynamic -lruby -lprotobuf -lpthread -lm -lc"
16
+ checked program was:
17
+ /* begin */
18
+ 1: #include "ruby.h"
19
+ 2:
20
+ 3: int main(int argc, char **argv)
21
+ 4: {
22
+ 5: return !!argv[argc];
23
+ 6: }
24
+ /* end */
25
+
26
+ | pkg-config --cflags-only-I protobuf
27
+ => "\n"
28
+ | pkg-config --cflags-only-other protobuf
29
+ => "\n"
30
+ | pkg-config --libs-only-l protobuf
31
+ => "-lprotobuf -lpthread \n"
32
+ package configuration for protobuf
33
+ incflags:
34
+ cflags:
35
+ ldflags:
36
+ libs: -lprotobuf -lpthread
37
+
@@ -47,6 +47,9 @@ struct LangChunksStats {
47
47
 
48
48
  // Number chunks corresponding to the language.
49
49
  int num_chunks = 0;
50
+
51
+ // Specifies the byte ranges that language applies to.
52
+ std::vector<NNetLanguageIdentifier::SpanInfo> byte_ranges;
50
53
  };
51
54
 
52
55
  // Compares two pairs based on their values.
@@ -298,12 +301,16 @@ NNetLanguageIdentifier::FindTopNMostFreqLangs(const string &text,
298
301
  total_num_bytes += num_original_span_bytes;
299
302
 
300
303
  const string selected_text = SelectTextGivenScriptSpan(script_span);
304
+
301
305
  result = FindLanguageOfValidUTF8(selected_text);
302
306
  language = result.language;
303
307
  lang_stats[language].byte_sum += num_original_span_bytes;
304
308
  lang_stats[language].prob_sum +=
305
309
  result.probability * num_original_span_bytes;
306
310
  lang_stats[language].num_chunks++;
311
+ // Add SpanInfo. Start and end indices are relative to original input.
312
+ lang_stats[language].byte_ranges.push_back(SpanInfo(
313
+ ss.MapBack(0), ss.MapBack(script_span.text_bytes), result.probability));
307
314
  }
308
315
 
309
316
  // Sort the languages based on the number of bytes associated with them.
@@ -329,6 +336,7 @@ NNetLanguageIdentifier::FindTopNMostFreqLangs(const string &text,
329
336
  result.probability = stats.prob_sum / stats.byte_sum;
330
337
  result.proportion = stats.byte_sum / byte_sum;
331
338
  result.is_reliable = ResultIsReliable(language, result.probability);
339
+ result.byte_ranges = stats.byte_ranges;
332
340
  results.push_back(result);
333
341
  }
334
342
 
@@ -44,6 +44,19 @@ class LanguageIdEmbeddingFeatureExtractor
44
44
  // Class for detecting the language of a document.
45
45
  class NNetLanguageIdentifier {
46
46
  public:
47
+ // Holds probability that Span, specified by start/end indices, is a given
48
+ // language. The langauge is not stored here; it can be found in Result, which
49
+ // holds a vector of SpanInfo.
50
+ struct SpanInfo {
51
+ SpanInfo(int start_index_val, int end_index_val, float probability_val)
52
+ : start_index(start_index_val),
53
+ end_index(end_index_val),
54
+ probability(probability_val) {}
55
+ int start_index = -1;
56
+ int end_index = -1;
57
+ float probability = 0.0;
58
+ };
59
+
47
60
  // Information about a predicted language.
48
61
  struct Result {
49
62
  string language = kUnknown;
@@ -53,6 +66,9 @@ class NNetLanguageIdentifier {
53
66
  // Proportion of bytes associated with the language. If FindLanguage is
54
67
  // called, this variable is set to 1.
55
68
  float proportion = 0.0;
69
+
70
+ // Specifies the byte ranges that |language| applies to.
71
+ std::vector<SpanInfo> byte_ranges;
56
72
  };
57
73
 
58
74
  NNetLanguageIdentifier();
Binary file
@@ -1,4 +1,4 @@
1
- /* Copyright 2017 Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
1
+ /* Copyright 2017 Akihiko Odaki <akihiko.odaki@gmail.com>
2
2
  All Rights Reserved.
3
3
 
4
4
  Licensed under the Apache License, Version 2.0 (the "License");
@@ -26,42 +26,90 @@ limitations under the License.
26
26
  #define EXPORT __attribute__ ((visibility ("default")))
27
27
  #endif
28
28
 
29
- struct NNetLanguageIdentifier {
30
- chrome_lang_id::NNetLanguageIdentifier context;
31
- std::string language;
32
- };
33
-
34
29
  struct Result {
35
30
  struct {
36
31
  const char *data;
37
32
  std::size_t size;
38
33
  } language;
34
+ struct {
35
+ const chrome_lang_id::NNetLanguageIdentifier::SpanInfo *data;
36
+ std::size_t size;
37
+ } byte_ranges;
39
38
  float probability;
40
39
  float proportion;
41
40
  bool is_reliable;
42
41
  };
43
42
 
43
+ struct OwningResult {
44
+ OwningResult(chrome_lang_id::NNetLanguageIdentifier::Result&& result) {
45
+ references.language = std::move(result.language);
46
+ references.byte_ranges = std::move(result.byte_ranges);
47
+ plain.language.data = references.language.data();
48
+ plain.language.size = references.language.size();
49
+ plain.byte_ranges.data = references.byte_ranges.data();
50
+ plain.byte_ranges.size = references.byte_ranges.size();
51
+ plain.probability = result.probability;
52
+ plain.proportion = result.proportion;
53
+ plain.is_reliable = result.is_reliable;
54
+ }
55
+
56
+ Result plain;
57
+ struct {
58
+ std::string language;
59
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::SpanInfo> byte_ranges;
60
+ } references;
61
+ };
62
+
44
63
  extern "C" {
45
- EXPORT Result NNetLanguageIdentifier_find_language(void *pointer,
46
- const char *data,
47
- std::size_t size) {
48
- auto instance = static_cast<NNetLanguageIdentifier *>(pointer);
49
- auto result = instance->context.FindLanguage(std::string(data, size));
50
- instance->language = std::move(result.language);
51
-
52
- return Result {
53
- { instance->language.data(), instance->language.size() },
54
- result.probability,
55
- result.proportion,
56
- result.is_reliable
57
- };
64
+ EXPORT OwningResult *NNetLanguageIdentifier_find_language(
65
+ chrome_lang_id::NNetLanguageIdentifier *instance,
66
+ const char *data,
67
+ std::size_t size) {
68
+ return new OwningResult(instance->FindLanguage(std::string(data, size)));
58
69
  }
59
70
 
60
- EXPORT void delete_NNetLanguageIdentifier(void *pointer) {
61
- delete static_cast<NNetLanguageIdentifier *>(pointer);
71
+ EXPORT std::vector<chrome_lang_id::NNetLanguageIdentifier::Result>*
72
+ NNetLanguageIdentifier_find_top_n_most_freq_langs(
73
+ chrome_lang_id::NNetLanguageIdentifier *instance,
74
+ const char *data, std::size_t size, int num_langs) {
75
+ std::string text(data, size);
76
+ return new auto(instance->FindTopNMostFreqLangs(text, num_langs));
62
77
  }
63
78
 
64
- EXPORT void *new_NNetLanguageIdentifier(int min_num_bytes, int max_num_bytes) {
65
- return new NNetLanguageIdentifier{{min_num_bytes, max_num_bytes}, {}};
79
+ EXPORT void delete_NNetLanguageIdentifier(
80
+ chrome_lang_id::NNetLanguageIdentifier *pointer) {
81
+ delete pointer;
82
+ }
83
+
84
+ EXPORT void delete_result(OwningResult *pointer) {
85
+ delete pointer;
86
+ }
87
+
88
+ EXPORT void delete_results(
89
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> *pointer) {
90
+ delete pointer;
91
+ }
92
+
93
+ EXPORT chrome_lang_id::NNetLanguageIdentifier *new_NNetLanguageIdentifier(
94
+ int min_num_bytes, int max_num_bytes) {
95
+ return new chrome_lang_id::NNetLanguageIdentifier(
96
+ min_num_bytes, max_num_bytes);
97
+ }
98
+
99
+ EXPORT Result refer_to_nth_result(
100
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> *results,
101
+ std::size_t index) {
102
+ Result c;
103
+ auto& cc = (*results)[index];
104
+
105
+ c.language.data = cc.language.data();
106
+ c.language.size = cc.language.size();
107
+ c.byte_ranges.data = cc.byte_ranges.data();
108
+ c.byte_ranges.size = cc.byte_ranges.size();
109
+ c.probability = cc.probability;
110
+ c.proportion = cc.proportion;
111
+ c.is_reliable = cc.is_reliable;
112
+
113
+ return c;
66
114
  }
67
115
  }
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/ext/cld3/utils.o ADDED
Binary file
Binary file
data/lib/cld3.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # File including an implementation of CLD3 module. Some documentations are
2
2
  # extracted from ext/cld3/ext/src/nnet_language_identifier.h.
3
3
  #
4
- # Copyright 2017 Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
4
+ # Copyright 2017 Akihiko Odaki <akihiko.odaki@gmail.com>
5
5
  # All Rights Reserved.
6
6
  #
7
7
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -49,10 +49,15 @@ module CLD3
49
49
  # This is Numeric object.
50
50
  RELIABILITY_HR_BS_THRESHOLD = 0.5
51
51
 
52
+ # Holds probability that Span, specified by start/end indices, is a given
53
+ # language. The langauge is not stored here; it can be found in Result, which
54
+ # holds an Array of SpanInfo.
55
+ SpanInfo = Struct.new(:start_index, :end_index, :probability)
56
+
52
57
  # Information about a predicted language.
53
58
  # This is an instance of Struct with the following members:
54
59
  #
55
- # [language] This is symbol or nil.
60
+ # [language] This is symbol.
56
61
  #
57
62
  # [probability] Language probability. This is Numeric object.
58
63
  #
@@ -61,33 +66,95 @@ module CLD3
61
66
  # [proportion] Proportion of bytes associated with the language. If
62
67
  # #find_language is called, this variable is set to 1.
63
68
  # This is Numeric object.
64
- Result = Struct.new("Result", :language, :probability, :reliable?, :proportion)
69
+ #
70
+ # [byte_ranges] Specifies the byte ranges in UTF-8 that |language| applies to.
71
+ # This is an Array of SpanInfo.
72
+ Result = Struct.new(:language, :probability, :reliable?, :proportion, :byte_ranges)
65
73
 
66
74
  # The arguments are two String objects.
67
- def initialize(minNumBytes = MIN_NUM_BYTES_TO_CONSIDER, maxNumBytes = MAX_NUM_BYTES_TO_CONSIDER)
68
- @cc = Unstable::NNetLanguageIdentifier::Pointer.new(Unstable.new_NNetLanguageIdentifier(minNumBytes, maxNumBytes))
75
+ def initialize(min_num_bytes = MIN_NUM_BYTES_TO_CONSIDER, max_num_bytes = MAX_NUM_BYTES_TO_CONSIDER)
76
+ @cc = Unstable::NNetLanguageIdentifier::Pointer.new(Unstable.new_NNetLanguageIdentifier(min_num_bytes, max_num_bytes))
69
77
  end
70
78
 
71
79
  # Finds the most likely language for the given text, along with additional
72
80
  # information (e.g., probability). The prediction is based on the first N
73
81
  # bytes where N is the minumum between the number of interchange valid UTF8
74
82
  # bytes and +max_num_bytes_+. If N is less than +min_num_bytes_+ long, then
75
- # this function returns nil as language.
83
+ # this function returns nil.
76
84
  # The argument is a String object.
77
85
  # The returned value of this function is an instance of Result.
78
86
  def find_language(text)
79
87
  text_utf8 = text.encode(Encoding::UTF_8)
80
88
  pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
81
- pointer.put_bytes(0, text_utf8)
82
89
 
83
- cc_result = Unstable.NNetLanguageIdentifier_find_language(@cc, pointer, text_utf8.bytesize)
84
- language = cc_result[:language_data].read_bytes(cc_result[:language_size])
90
+ begin
91
+ pointer.put_bytes(0, text_utf8)
92
+
93
+ result = Unstable.NNetLanguageIdentifier_find_language(@cc, pointer, text_utf8.bytesize)
94
+ begin
95
+ convert_result Unstable::NNetLanguageIdentifier::Result.new(result)
96
+ ensure
97
+ Unstable.delete_result result
98
+ end
99
+ ensure
100
+ pointer.free
101
+ end
102
+ end
103
+
104
+ # Splits the input text (up to the first byte, if any, that is not
105
+ # interchange valid UTF8) into spans based on the script, predicts a language
106
+ # for each span, and returns a vector storing the top num_langs most frequent
107
+ # languages along with additional information (e.g., proportions). The number
108
+ # of bytes considered for each span is the minimum between the size of the
109
+ # span and +max_num_bytes_+. If more languages are requested than what is
110
+ # available in the input, then the number of the returned elements will be
111
+ # the number of the latter. Also, if the size of the span is less than
112
+ # +min_num_bytes_+ long, then the span is skipped. If the input text is too
113
+ # long, only the first +MAX_NUM_INPUT_BYTES_TO_CONSIDER+ bytes are processed.
114
+ # The first argument is a String object.
115
+ # The second argument is Numeric object.
116
+ # The returned value of this functions is an Array of Result instances.
117
+ def find_top_n_most_freq_langs(text, num_langs)
118
+ text_utf8 = text.encode(Encoding::UTF_8)
119
+ pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
120
+
121
+ begin
122
+ pointer.put_bytes(0, text_utf8)
123
+
124
+ results = Unstable.NNetLanguageIdentifier_find_top_n_most_freq_langs(@cc, pointer, text_utf8.bytesize, num_langs)
125
+ begin
126
+ num_langs.times
127
+ .lazy
128
+ .map { |index| convert_result Unstable.refer_to_nth_result(results, index) }
129
+ .take_while { |result| !result.nil? }
130
+ .to_a
131
+ ensure
132
+ Unstable.delete_results results
133
+ end
134
+ ensure
135
+ pointer.free
136
+ end
137
+ end
138
+
139
+ private
140
+
141
+ def convert_result(result)
142
+ language = result[:language_data].read_bytes(result[:language_size])
143
+ return nil if language == "und"
144
+
145
+ cursor = result[:byte_ranges_data]
146
+ byte_ranges = result[:byte_ranges_size].times.map do
147
+ info = Unstable::NNetLanguageIdentifier::SpanInfo.new(cursor)
148
+ cursor += Unstable::NNetLanguageIdentifier::SpanInfo.size
149
+ SpanInfo.new(info[:start_index], info[:end_index], info[:probability])
150
+ end
85
151
 
86
152
  Result.new(
87
- language == "und" ? nil : language.to_sym,
88
- cc_result[:probability],
89
- cc_result[:reliable?],
90
- cc_result[:proportion])
153
+ language.to_sym,
154
+ result[:probability],
155
+ result[:reliable?],
156
+ result[:proportion],
157
+ byte_ranges)
91
158
  end
92
159
  end
93
160
 
@@ -112,7 +179,7 @@ module CLD3
112
179
  module Unstable
113
180
  extend FFI::Library
114
181
 
115
- ffi_lib File.join(File.expand_path(File.dirname(__FILE__)), "..", "ext", "cld3", "libcld3." + RbConfig::CONFIG["DLEXT"])
182
+ ffi_lib File.join(__dir__, "..", "ext", "cld3", "libcld3." + RbConfig::CONFIG["DLEXT"])
116
183
 
117
184
  module NNetLanguageIdentifier
118
185
  class Pointer < FFI::AutoPointer
@@ -121,17 +188,30 @@ module CLD3
121
188
  end
122
189
  end
123
190
 
191
+ class SpanInfo < FFI::Struct
192
+ layout :start_index, :int, :end_index, :int, :probability, :float
193
+ end
194
+
124
195
  class Result < FFI::Struct
125
- layout :language_data, :pointer, :language_size, :size_t, :probability, :float, :proportion, :float, :reliable?, :bool
196
+ layout :language_data, :pointer, :language_size, :size_t, :byte_ranges_data, :pointer, :byte_ranges_size, :size_t, :probability, :float, :proportion, :float, :reliable?, :bool
126
197
  end
127
198
  end
128
199
 
129
200
  attach_function :delete_NNetLanguageIdentifier, [ :pointer ], :void
130
201
 
202
+ attach_function :delete_result, [ :pointer ], :void
203
+
204
+ attach_function :delete_results, [ :pointer ], :void
205
+
131
206
  attach_function :new_NNetLanguageIdentifier, [ :int, :int ], :pointer
132
207
 
208
+ attach_function :refer_to_nth_result, [ :pointer, :size_t ], NNetLanguageIdentifier::Result.by_value
209
+
133
210
  attach_function :NNetLanguageIdentifier_find_language,
134
- [ :pointer, :buffer_in, :size_t ], NNetLanguageIdentifier::Result.by_value
211
+ [ :pointer, :buffer_in, :size_t ], :pointer
212
+
213
+ attach_function :NNetLanguageIdentifier_find_top_n_most_freq_langs,
214
+ [ :pointer, :buffer_in, :size_t, :int ], :pointer
135
215
  end
136
216
 
137
217
  private_constant :Unstable
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cld3
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.4
4
+ version: 3.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Akihiko Odaki
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-04-20 00:00:00.000000000 Z
11
+ date: 2021-04-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: 1.1.0
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: 1.11.0
22
+ version: 1.16.0
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  version: 1.1.0
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: 1.11.0
32
+ version: 1.16.0
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: rspec
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -39,7 +39,7 @@ dependencies:
39
39
  version: 3.0.0
40
40
  - - "<"
41
41
  - !ruby/object:Gem::Version
42
- version: 3.9.0
42
+ version: 3.11.0
43
43
  type: :development
44
44
  prerelease: false
45
45
  version_requirements: !ruby/object:Gem::Requirement
@@ -49,10 +49,10 @@ dependencies:
49
49
  version: 3.0.0
50
50
  - - "<"
51
51
  - !ruby/object:Gem::Version
52
- version: 3.9.0
52
+ version: 3.11.0
53
53
  description: Compact Language Detector v3 (CLD3) is a neural network model for language
54
54
  identification.
55
- email: akihiko.odaki.4i@stu.hosei.ac.jp
55
+ email: akihiko.odaki@gmail.com
56
56
  executables: []
57
57
  extensions:
58
58
  - ext/cld3/extconf.rb
@@ -63,76 +63,108 @@ files:
63
63
  - LICENSE_CLD3
64
64
  - README.md
65
65
  - cld3.gemspec
66
+ - ext/cld3/Makefile
66
67
  - ext/cld3/base.cc
67
68
  - ext/cld3/base.h
69
+ - ext/cld3/base.o
68
70
  - ext/cld3/casts.h
69
71
  - ext/cld3/embedding_feature_extractor.cc
70
72
  - ext/cld3/embedding_feature_extractor.h
73
+ - ext/cld3/embedding_feature_extractor.o
71
74
  - ext/cld3/embedding_network.cc
72
75
  - ext/cld3/embedding_network.h
76
+ - ext/cld3/embedding_network.o
73
77
  - ext/cld3/embedding_network_params.h
74
78
  - ext/cld3/extconf.rb
75
79
  - ext/cld3/feature_extractor.cc
76
80
  - ext/cld3/feature_extractor.h
81
+ - ext/cld3/feature_extractor.o
82
+ - ext/cld3/feature_extractor.pb.o
77
83
  - ext/cld3/feature_extractor.proto
78
84
  - ext/cld3/feature_types.cc
79
85
  - ext/cld3/feature_types.h
86
+ - ext/cld3/feature_types.o
80
87
  - ext/cld3/fixunicodevalue.cc
81
88
  - ext/cld3/fixunicodevalue.h
89
+ - ext/cld3/fixunicodevalue.o
82
90
  - ext/cld3/float16.h
83
91
  - ext/cld3/fml_parser.cc
84
92
  - ext/cld3/fml_parser.h
93
+ - ext/cld3/fml_parser.o
85
94
  - ext/cld3/generated_entities.cc
95
+ - ext/cld3/generated_entities.o
86
96
  - ext/cld3/generated_ulscript.cc
87
97
  - ext/cld3/generated_ulscript.h
98
+ - ext/cld3/generated_ulscript.o
88
99
  - ext/cld3/getonescriptspan.cc
89
100
  - ext/cld3/getonescriptspan.h
101
+ - ext/cld3/getonescriptspan.o
90
102
  - ext/cld3/integral_types.h
91
103
  - ext/cld3/lang_id_nn_params.cc
92
104
  - ext/cld3/lang_id_nn_params.h
105
+ - ext/cld3/lang_id_nn_params.o
93
106
  - ext/cld3/language_identifier_features.cc
94
107
  - ext/cld3/language_identifier_features.h
108
+ - ext/cld3/language_identifier_features.o
109
+ - ext/cld3/libcld3.def
110
+ - ext/cld3/libcld3.so
111
+ - ext/cld3/mkmf.log
95
112
  - ext/cld3/nnet_language_identifier.cc
96
113
  - ext/cld3/nnet_language_identifier.h
114
+ - ext/cld3/nnet_language_identifier.o
97
115
  - ext/cld3/nnet_language_identifier_c.cc
116
+ - ext/cld3/nnet_language_identifier_c.o
98
117
  - ext/cld3/offsetmap.cc
99
118
  - ext/cld3/offsetmap.h
119
+ - ext/cld3/offsetmap.o
100
120
  - ext/cld3/port.h
101
121
  - ext/cld3/registry.cc
102
122
  - ext/cld3/registry.h
123
+ - ext/cld3/registry.o
103
124
  - ext/cld3/relevant_script_feature.cc
104
125
  - ext/cld3/relevant_script_feature.h
126
+ - ext/cld3/relevant_script_feature.o
105
127
  - ext/cld3/script_detector.h
128
+ - ext/cld3/sentence.pb.o
106
129
  - ext/cld3/sentence.proto
107
130
  - ext/cld3/sentence_features.cc
108
131
  - ext/cld3/sentence_features.h
132
+ - ext/cld3/sentence_features.o
109
133
  - ext/cld3/simple_adder.h
110
134
  - ext/cld3/stringpiece.h
111
135
  - ext/cld3/task_context.cc
112
136
  - ext/cld3/task_context.h
137
+ - ext/cld3/task_context.o
113
138
  - ext/cld3/task_context_params.cc
114
139
  - ext/cld3/task_context_params.h
140
+ - ext/cld3/task_context_params.o
141
+ - ext/cld3/task_spec.pb.o
115
142
  - ext/cld3/task_spec.proto
116
143
  - ext/cld3/text_processing.cc
117
144
  - ext/cld3/text_processing.h
145
+ - ext/cld3/text_processing.o
118
146
  - ext/cld3/unicodetext.cc
119
147
  - ext/cld3/unicodetext.h
148
+ - ext/cld3/unicodetext.o
120
149
  - ext/cld3/utf8acceptinterchange.h
121
150
  - ext/cld3/utf8prop_lettermarkscriptnum.h
122
151
  - ext/cld3/utf8repl_lettermarklower.h
123
152
  - ext/cld3/utf8scannot_lettermarkspecial.h
124
153
  - ext/cld3/utf8statetable.cc
125
154
  - ext/cld3/utf8statetable.h
155
+ - ext/cld3/utf8statetable.o
126
156
  - ext/cld3/utils.cc
127
157
  - ext/cld3/utils.h
158
+ - ext/cld3/utils.o
128
159
  - ext/cld3/workspace.cc
129
160
  - ext/cld3/workspace.h
161
+ - ext/cld3/workspace.o
130
162
  - lib/cld3.rb
131
163
  homepage: https://github.com/akihikodaki/cld3-ruby
132
164
  licenses:
133
165
  - Apache-2.0
134
166
  metadata: {}
135
- post_install_message:
167
+ post_install_message:
136
168
  rdoc_options: []
137
169
  require_paths:
138
170
  - lib
@@ -140,18 +172,18 @@ required_ruby_version: !ruby/object:Gem::Requirement
140
172
  requirements:
141
173
  - - ">="
142
174
  - !ruby/object:Gem::Version
143
- version: 2.3.0
175
+ version: 2.6.0
144
176
  - - "<"
145
177
  - !ruby/object:Gem::Version
146
- version: 2.7.0
178
+ version: 3.1.0
147
179
  required_rubygems_version: !ruby/object:Gem::Requirement
148
180
  requirements:
149
181
  - - ">="
150
182
  - !ruby/object:Gem::Version
151
183
  version: '0'
152
184
  requirements: []
153
- rubygems_version: 3.0.3
154
- signing_key:
185
+ rubygems_version: 3.1.4
186
+ signing_key:
155
187
  specification_version: 4
156
188
  summary: Compact Language Detector v3 (CLD3)
157
189
  test_files: []