cld3 3.2.4 → 3.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/cld3.gemspec +4 -4
- data/ext/cld3/ext/CMakeLists.txt +69 -0
- data/ext/cld3/ext/CONTRIBUTING.md +26 -0
- data/{LICENSE_CLD3 → ext/cld3/ext/LICENSE} +0 -0
- data/ext/cld3/ext/README.md +73 -0
- data/ext/cld3/ext/misc/myprotobuf.cmake +58 -0
- data/ext/cld3/ext/model.png +0 -0
- data/ext/cld3/ext/src/BUILD.gn +133 -0
- data/ext/cld3/ext/src/DEPS +4 -0
- data/ext/cld3/{base.cc → ext/src/base.cc} +0 -0
- data/ext/cld3/{base.h → ext/src/base.h} +0 -0
- data/ext/cld3/{casts.h → ext/src/casts.h} +0 -0
- data/ext/cld3/{embedding_feature_extractor.cc → ext/src/embedding_feature_extractor.cc} +0 -0
- data/ext/cld3/{embedding_feature_extractor.h → ext/src/embedding_feature_extractor.h} +0 -0
- data/ext/cld3/{embedding_network.cc → ext/src/embedding_network.cc} +0 -0
- data/ext/cld3/{embedding_network.h → ext/src/embedding_network.h} +0 -0
- data/ext/cld3/{embedding_network_params.h → ext/src/embedding_network_params.h} +0 -0
- data/ext/cld3/{feature_extractor.cc → ext/src/feature_extractor.cc} +0 -0
- data/ext/cld3/{feature_extractor.h → ext/src/feature_extractor.h} +0 -0
- data/ext/cld3/{feature_extractor.proto → ext/src/feature_extractor.proto} +0 -0
- data/ext/cld3/{feature_types.cc → ext/src/feature_types.cc} +0 -0
- data/ext/cld3/{feature_types.h → ext/src/feature_types.h} +0 -0
- data/ext/cld3/{float16.h → ext/src/float16.h} +0 -0
- data/ext/cld3/{fml_parser.cc → ext/src/fml_parser.cc} +0 -0
- data/ext/cld3/{fml_parser.h → ext/src/fml_parser.h} +0 -0
- data/ext/cld3/{lang_id_nn_params.cc → ext/src/lang_id_nn_params.cc} +0 -0
- data/ext/cld3/{lang_id_nn_params.h → ext/src/lang_id_nn_params.h} +0 -0
- data/ext/cld3/{language_identifier_features.cc → ext/src/language_identifier_features.cc} +0 -0
- data/ext/cld3/{language_identifier_features.h → ext/src/language_identifier_features.h} +0 -0
- data/ext/cld3/ext/src/language_identifier_features_test.cc +261 -0
- data/ext/cld3/ext/src/language_identifier_main.cc +54 -0
- data/ext/cld3/ext/src/nnet_lang_id_test.cc +254 -0
- data/ext/cld3/ext/src/nnet_lang_id_test_data.cc +529 -0
- data/ext/cld3/ext/src/nnet_lang_id_test_data.h +117 -0
- data/ext/cld3/{nnet_language_identifier.cc → ext/src/nnet_language_identifier.cc} +8 -0
- data/ext/cld3/{nnet_language_identifier.h → ext/src/nnet_language_identifier.h} +16 -0
- data/ext/cld3/{registry.cc → ext/src/registry.cc} +0 -0
- data/ext/cld3/{registry.h → ext/src/registry.h} +0 -0
- data/ext/cld3/{relevant_script_feature.cc → ext/src/relevant_script_feature.cc} +0 -0
- data/ext/cld3/{relevant_script_feature.h → ext/src/relevant_script_feature.h} +0 -0
- data/ext/cld3/ext/src/relevant_script_feature_test.cc +259 -0
- data/ext/cld3/{script_detector.h → ext/src/script_detector.h} +0 -0
- data/ext/cld3/ext/src/script_detector_test.cc +161 -0
- data/ext/cld3/ext/src/script_span/README.md +11 -0
- data/ext/cld3/{fixunicodevalue.cc → ext/src/script_span/fixunicodevalue.cc} +0 -0
- data/ext/cld3/{fixunicodevalue.h → ext/src/script_span/fixunicodevalue.h} +0 -0
- data/ext/cld3/{generated_entities.cc → ext/src/script_span/generated_entities.cc} +0 -0
- data/ext/cld3/{generated_ulscript.cc → ext/src/script_span/generated_ulscript.cc} +0 -0
- data/ext/cld3/{generated_ulscript.h → ext/src/script_span/generated_ulscript.h} +0 -0
- data/ext/cld3/{getonescriptspan.cc → ext/src/script_span/getonescriptspan.cc} +0 -0
- data/ext/cld3/{getonescriptspan.h → ext/src/script_span/getonescriptspan.h} +1 -1
- data/ext/cld3/ext/src/script_span/getonescriptspan_test.cc +135 -0
- data/ext/cld3/{integral_types.h → ext/src/script_span/integral_types.h} +0 -0
- data/ext/cld3/{offsetmap.cc → ext/src/script_span/offsetmap.cc} +0 -0
- data/ext/cld3/{offsetmap.h → ext/src/script_span/offsetmap.h} +0 -0
- data/ext/cld3/{port.h → ext/src/script_span/port.h} +0 -0
- data/ext/cld3/{stringpiece.h → ext/src/script_span/stringpiece.h} +0 -0
- data/ext/cld3/{text_processing.cc → ext/src/script_span/text_processing.cc} +0 -0
- data/ext/cld3/{text_processing.h → ext/src/script_span/text_processing.h} +0 -0
- data/ext/cld3/{utf8acceptinterchange.h → ext/src/script_span/utf8acceptinterchange.h} +0 -0
- data/ext/cld3/{utf8prop_lettermarkscriptnum.h → ext/src/script_span/utf8prop_lettermarkscriptnum.h} +0 -0
- data/ext/cld3/{utf8repl_lettermarklower.h → ext/src/script_span/utf8repl_lettermarklower.h} +0 -0
- data/ext/cld3/{utf8scannot_lettermarkspecial.h → ext/src/script_span/utf8scannot_lettermarkspecial.h} +0 -0
- data/ext/cld3/{utf8statetable.cc → ext/src/script_span/utf8statetable.cc} +0 -0
- data/ext/cld3/{utf8statetable.h → ext/src/script_span/utf8statetable.h} +0 -0
- data/ext/cld3/{sentence.proto → ext/src/sentence.proto} +0 -0
- data/ext/cld3/{sentence_features.cc → ext/src/sentence_features.cc} +0 -0
- data/ext/cld3/{sentence_features.h → ext/src/sentence_features.h} +0 -0
- data/ext/cld3/{simple_adder.h → ext/src/simple_adder.h} +0 -0
- data/ext/cld3/{task_context.cc → ext/src/task_context.cc} +0 -0
- data/ext/cld3/{task_context.h → ext/src/task_context.h} +0 -0
- data/ext/cld3/{task_context_params.cc → ext/src/task_context_params.cc} +0 -0
- data/ext/cld3/{task_context_params.h → ext/src/task_context_params.h} +0 -0
- data/ext/cld3/{task_spec.proto → ext/src/task_spec.proto} +0 -0
- data/ext/cld3/{unicodetext.cc → ext/src/unicodetext.cc} +0 -0
- data/ext/cld3/{unicodetext.h → ext/src/unicodetext.h} +0 -0
- data/ext/cld3/{utils.cc → ext/src/utils.cc} +0 -0
- data/ext/cld3/{utils.h → ext/src/utils.h} +0 -0
- data/ext/cld3/{workspace.cc → ext/src/workspace.cc} +0 -0
- data/ext/cld3/{workspace.h → ext/src/workspace.h} +0 -0
- metadata +87 -71
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c11344b17ebf6afb1108da299e132c5dda6a032c3d2e038ea4a476ff27d312f
|
4
|
+
data.tar.gz: 60c5d15ae88e10cb40629624e634960e1556c8032dcd48921c9d23b0514fb456
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cfe48d2f1992204154b4d3a162f6c4a2ecf358d01f09eb0925be9a41fb968287544c84581458f11abd8d01ff54c74afb8b8a75d555da781c5b843868ca19e887
|
7
|
+
data.tar.gz: cc603eeaee27183aac35d223a01fc09342c314091d8ca0f25d54f051e4071853a8ba02fbd471ac575a3273919f8fc08497c539023e7be4fe72b416cb629b4ea5
|
data/cld3.gemspec
CHANGED
@@ -16,16 +16,16 @@
|
|
16
16
|
|
17
17
|
Gem::Specification.new do |gem|
|
18
18
|
gem.name = "cld3"
|
19
|
-
gem.version = "3.2.
|
19
|
+
gem.version = "3.2.5"
|
20
20
|
gem.summary = "Compact Language Detector v3 (CLD3)"
|
21
21
|
gem.description = "Compact Language Detector v3 (CLD3) is a neural network model for language identification."
|
22
22
|
gem.license = "Apache-2.0"
|
23
23
|
gem.homepage = "https://github.com/akihikodaki/cld3-ruby"
|
24
24
|
gem.author = "Akihiko Odaki"
|
25
25
|
gem.email = "akihiko.odaki.4i@stu.hosei.ac.jp"
|
26
|
-
gem.required_ruby_version = [ ">= 2.3.0", "< 2.
|
27
|
-
gem.add_dependency "ffi", [ ">= 1.1.0", "< 1.
|
28
|
-
gem.add_development_dependency "rspec", [ ">=3.0.0", "< 3.
|
26
|
+
gem.required_ruby_version = [ ">= 2.3.0", "< 2.8.0" ]
|
27
|
+
gem.add_dependency "ffi", [ ">= 1.1.0", "< 1.12.0" ]
|
28
|
+
gem.add_development_dependency "rspec", [ ">=3.0.0", "< 3.10.0" ]
|
29
29
|
gem.files = Dir[
|
30
30
|
"Gemfile", "LICENSE", "LICENSE_CLD3", "README.md",
|
31
31
|
"cld3.gemspec", "ext/**/*", "lib/**/*"
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# This cmake scripts only builds a static cld3 lib and the unittests.
|
2
|
+
|
3
|
+
project(cld3)
|
4
|
+
|
5
|
+
# Old versions of cmake dont search/find protobuf lite
|
6
|
+
cmake_minimum_required(VERSION 3.9)
|
7
|
+
|
8
|
+
find_package(Protobuf REQUIRED)
|
9
|
+
message(STATUS "Protobuf_FOUND= ${Protobuf_FOUND}")
|
10
|
+
message(STATUS "Protobuf_VERSION= ${Protobuf_VERSION}")
|
11
|
+
message(WARNING "Protobuf 2.5 and CLD3 seems happy together. This script does NOT check if your verison of protobuf is compatible.")
|
12
|
+
message(STATUS "Protobuf_LIBRARIES= ${Protobuf_LIBRARIES}")
|
13
|
+
message(STATUS "Protobuf_LITE_LIBRARIES= ${Protobuf_LITE_LIBRARIES}") # Usually /usr/lib64/libprotobuf-lite.so
|
14
|
+
|
15
|
+
# By default, protobuf_generate_cpp generates pb.* files directy in the cmake build dir.
|
16
|
+
# But CLD3 sources have been coded using hard coded pathes to cld_3/protos/*.pb.h.
|
17
|
+
# So *.pb.h must be output to cld_3/protos.
|
18
|
+
# For that, let's use a custom my_protobuf_generate_cpp:
|
19
|
+
include(${CMAKE_CURRENT_SOURCE_DIR}/misc/myprotobuf.cmake)
|
20
|
+
my_protobuf_generate_cpp(cld_3/protos PROTO_SRCS PROTO_HDRS src/feature_extractor.proto src/sentence.proto src/task_spec.proto)
|
21
|
+
message(STATUS "PROTO_HDRS= ${PROTO_HDRS}")
|
22
|
+
|
23
|
+
add_definitions(-fPIC) # Position Independant Code
|
24
|
+
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
|
25
|
+
add_definitions(-std=c++11) # Needed for std::to_string(), ...
|
26
|
+
|
27
|
+
include_directories(${CMAKE_CURRENT_BINARY_DIR}) # needed to include generated pb headers
|
28
|
+
|
29
|
+
add_library(${PROJECT_NAME}
|
30
|
+
${PROTO_SRCS} ${PROTO_HDRS}
|
31
|
+
src/base.cc
|
32
|
+
src/embedding_feature_extractor.cc
|
33
|
+
src/embedding_network.cc
|
34
|
+
src/feature_extractor.cc
|
35
|
+
src/feature_extractor.h
|
36
|
+
src/feature_types.cc
|
37
|
+
src/fml_parser.cc
|
38
|
+
src/language_identifier_features.cc
|
39
|
+
src/lang_id_nn_params.cc
|
40
|
+
src/nnet_language_identifier.cc
|
41
|
+
src/registry.cc
|
42
|
+
src/relevant_script_feature.cc
|
43
|
+
src/sentence_features.cc
|
44
|
+
src/task_context.cc
|
45
|
+
src/task_context_params.cc
|
46
|
+
src/unicodetext.cc
|
47
|
+
src/utils.cc
|
48
|
+
src/workspace.cc
|
49
|
+
|
50
|
+
src/script_span/generated_entities.cc
|
51
|
+
src/script_span/getonescriptspan.cc
|
52
|
+
src/script_span/getonescriptspan.h
|
53
|
+
src/script_span/getonescriptspan_test.cc
|
54
|
+
src/script_span/utf8statetable.cc
|
55
|
+
src/script_span/offsetmap.cc
|
56
|
+
src/script_span/text_processing.cc
|
57
|
+
src/script_span/text_processing.h
|
58
|
+
src/script_span/fixunicodevalue.cc
|
59
|
+
)
|
60
|
+
|
61
|
+
# unit tests exec:
|
62
|
+
add_executable(language_identifier_main src/language_identifier_main.cc)
|
63
|
+
target_link_libraries(language_identifier_main cld3 ${Protobuf_LITE_LIBRARIES})
|
64
|
+
|
65
|
+
add_executable(getonescriptspan_test src/script_span/getonescriptspan_test.cc)
|
66
|
+
target_link_libraries(getonescriptspan_test cld3 ${Protobuf_LITE_LIBRARIES})
|
67
|
+
|
68
|
+
add_executable(language_identifier_features_test src/language_identifier_features_test.cc)
|
69
|
+
target_link_libraries(language_identifier_features_test cld3 ${Protobuf_LITE_LIBRARIES})
|
@@ -0,0 +1,26 @@
|
|
1
|
+
Want to contribute? Great! First, read this page (including the small print at
|
2
|
+
the end).
|
3
|
+
|
4
|
+
### Before you contribute
|
5
|
+
Before we can use your code, you must sign the
|
6
|
+
[Google Individual Contributor License Agreement](https://cla.developers.google.com/about/google-individual)
|
7
|
+
(CLA), which you can do online. The CLA is necessary mainly because you own the
|
8
|
+
copyright to your changes, even after your contribution becomes part of our
|
9
|
+
codebase, so we need your permission to use and distribute your code. We also
|
10
|
+
need to be sure of various other things—for instance that you'll tell us if you
|
11
|
+
know that your code infringes on other people's patents. You don't have to sign
|
12
|
+
the CLA until after you've submitted your code for review and a member has
|
13
|
+
approved it, but you must do it before we can put your code into our codebase.
|
14
|
+
Before you start working on a larger contribution, you should get in touch with
|
15
|
+
us first through the issue tracker with your idea so that we can help out and
|
16
|
+
possibly guide you. Coordinating up front makes it much easier to avoid
|
17
|
+
frustration later on.
|
18
|
+
|
19
|
+
### Code reviews
|
20
|
+
All submissions, including submissions by project members, require review. We
|
21
|
+
use Github pull requests for this purpose.
|
22
|
+
|
23
|
+
### The small print
|
24
|
+
Contributions made by corporations are covered by a different agreement than
|
25
|
+
the one above, the
|
26
|
+
[Software Grant and Corporate Contributor License Agreement](https://cla.developers.google.com/about/google-corporate).
|
File without changes
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# Compact Language Detector v3 (CLD3)
|
2
|
+
|
3
|
+
* [Model](#model)
|
4
|
+
* [Installation](#installation)
|
5
|
+
* [Contact](#contact)
|
6
|
+
* [Credits](#credits)
|
7
|
+
|
8
|
+
### Model
|
9
|
+
|
10
|
+
CLD3 is a neural network model for language identification. This package
|
11
|
+
contains the inference code and a trained model. The inference code
|
12
|
+
extracts character ngrams from the input text and computes the fraction
|
13
|
+
of times each of them appears. For example, as shown in the figure below,
|
14
|
+
if the input text is "banana", then one of the extracted trigrams is "ana"
|
15
|
+
and the corresponding fraction is 2/4. The ngrams are hashed down to an id
|
16
|
+
within a small range, and each id is represented by a dense embedding vector
|
17
|
+
estimated during training.
|
18
|
+
|
19
|
+
The model averages the embeddings corresponding to each ngram type according
|
20
|
+
to the fractions, and the averaged embeddings are concatenated to produce
|
21
|
+
the embedding layer. The remaining components of the network are a hidden
|
22
|
+
(Rectified linear) layer and a softmax layer.
|
23
|
+
|
24
|
+
To get a language prediction for the input text, we simply perform a forward
|
25
|
+
pass through the network.
|
26
|
+
|
27
|
+

|
28
|
+
|
29
|
+
### Installation
|
30
|
+
CLD3 is designed to run in the Chrome browser, so it relies on code in
|
31
|
+
[Chromium](http://www.chromium.org/).
|
32
|
+
The steps for building and running the demo of the language detection model are:
|
33
|
+
|
34
|
+
- [check out](http://www.chromium.org/developers/how-tos/get-the-code) the
|
35
|
+
Chromium repository.
|
36
|
+
- copy the code to `//third_party/cld_3`
|
37
|
+
- Uncomment `language_identifier_main` executable in `src/BUILD.gn`.
|
38
|
+
- build and run the model using the commands:
|
39
|
+
|
40
|
+
```shell
|
41
|
+
gn gen out/Default
|
42
|
+
ninja -C out/Default third_party/cld_3/src/src:language_identifier_main
|
43
|
+
out/Default/language_identifier_main
|
44
|
+
```
|
45
|
+
### Bugs and Feature Requests
|
46
|
+
|
47
|
+
Open a [GitHub issue](https://github.com/google/cld3/issues) for this repository to file bugs and feature requests.
|
48
|
+
|
49
|
+
### Announcements and Discussion
|
50
|
+
|
51
|
+
For announcements regarding major updates as well as general discussion list, please subscribe to:
|
52
|
+
[cld3-users@googlegroups.com](https://groups.google.com/forum/#!forum/cld3-users)
|
53
|
+
|
54
|
+
### Credits
|
55
|
+
|
56
|
+
Original authors of the code in this package include (in alphabetical order):
|
57
|
+
|
58
|
+
* Alex Salcianu
|
59
|
+
* Andy Golding
|
60
|
+
* Anton Bakalov
|
61
|
+
* Chris Alberti
|
62
|
+
* Daniel Andor
|
63
|
+
* David Weiss
|
64
|
+
* Emily Pitler
|
65
|
+
* Greg Coppola
|
66
|
+
* Jason Riesa
|
67
|
+
* Kuzman Ganchev
|
68
|
+
* Michael Ringgaard
|
69
|
+
* Nan Hua
|
70
|
+
* Ryan McDonald
|
71
|
+
* Slav Petrov
|
72
|
+
* Stefan Istrate
|
73
|
+
* Terry Koo
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Special PROTOBUF_GENERATE_CPP which allows to set the output folder:
|
2
|
+
# From https://stackoverflow.com/users/1600278/akira-okumura
|
3
|
+
|
4
|
+
function(MY_PROTOBUF_GENERATE_CPP PATH SRCS HDRS)
|
5
|
+
if(NOT ARGN)
|
6
|
+
message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")
|
7
|
+
return()
|
8
|
+
endif()
|
9
|
+
|
10
|
+
if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
|
11
|
+
# Create an include path for each file specified
|
12
|
+
foreach(FIL ${ARGN})
|
13
|
+
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
|
14
|
+
get_filename_component(ABS_PATH ${ABS_FIL} PATH)
|
15
|
+
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
|
16
|
+
if(${_contains_already} EQUAL -1)
|
17
|
+
list(APPEND _protobuf_include_path -I ${ABS_PATH})
|
18
|
+
endif()
|
19
|
+
endforeach()
|
20
|
+
else()
|
21
|
+
set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
|
22
|
+
endif()
|
23
|
+
|
24
|
+
if(DEFINED PROTOBUF_IMPORT_DIRS)
|
25
|
+
foreach(DIR ${PROTOBUF_IMPORT_DIRS})
|
26
|
+
get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
|
27
|
+
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
|
28
|
+
if(${_contains_already} EQUAL -1)
|
29
|
+
list(APPEND _protobuf_include_path -I ${ABS_PATH})
|
30
|
+
endif()
|
31
|
+
endforeach()
|
32
|
+
endif()
|
33
|
+
|
34
|
+
set(${SRCS})
|
35
|
+
set(${HDRS})
|
36
|
+
foreach(FIL ${ARGN})
|
37
|
+
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
|
38
|
+
get_filename_component(FIL_WE ${FIL} NAME_WE)
|
39
|
+
|
40
|
+
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.cc")
|
41
|
+
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.h")
|
42
|
+
|
43
|
+
execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/${PATH})
|
44
|
+
|
45
|
+
add_custom_command(
|
46
|
+
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.cc"
|
47
|
+
"${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.h"
|
48
|
+
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
|
49
|
+
ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR}/${PATH} ${_protobuf_include_path} ${ABS_FIL}
|
50
|
+
DEPENDS ${ABS_FIL}
|
51
|
+
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
|
52
|
+
VERBATIM )
|
53
|
+
endforeach()
|
54
|
+
|
55
|
+
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
|
56
|
+
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
|
57
|
+
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
|
58
|
+
endfunction()
|
Binary file
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
#==============================================================================
|
15
|
+
|
16
|
+
import("//third_party/protobuf/proto_library.gni")
|
17
|
+
|
18
|
+
proto_library("protos") {
|
19
|
+
sources = [
|
20
|
+
"feature_extractor.proto",
|
21
|
+
"sentence.proto",
|
22
|
+
"task_spec.proto",
|
23
|
+
]
|
24
|
+
proto_out_dir = "cld_3/protos"
|
25
|
+
}
|
26
|
+
|
27
|
+
static_library("cld_3") {
|
28
|
+
sources = [
|
29
|
+
"base.cc",
|
30
|
+
"base.h",
|
31
|
+
"casts.h",
|
32
|
+
"embedding_feature_extractor.cc",
|
33
|
+
"embedding_feature_extractor.h",
|
34
|
+
"embedding_network.cc",
|
35
|
+
"embedding_network.h",
|
36
|
+
"embedding_network_params.h",
|
37
|
+
"feature_extractor.cc",
|
38
|
+
"feature_extractor.h",
|
39
|
+
"feature_types.cc",
|
40
|
+
"feature_types.h",
|
41
|
+
"float16.h",
|
42
|
+
"fml_parser.cc",
|
43
|
+
"fml_parser.h",
|
44
|
+
"language_identifier_features.cc",
|
45
|
+
"language_identifier_features.h",
|
46
|
+
"lang_id_nn_params.cc",
|
47
|
+
"lang_id_nn_params.h",
|
48
|
+
"nnet_language_identifier.cc",
|
49
|
+
"nnet_language_identifier.h",
|
50
|
+
"registry.cc",
|
51
|
+
"registry.h",
|
52
|
+
"relevant_script_feature.cc",
|
53
|
+
"relevant_script_feature.h",
|
54
|
+
"script_detector.h",
|
55
|
+
"sentence_features.cc",
|
56
|
+
"sentence_features.h",
|
57
|
+
"simple_adder.h",
|
58
|
+
"script_span/fixunicodevalue.cc",
|
59
|
+
"script_span/fixunicodevalue.h",
|
60
|
+
"script_span/generated_entities.cc",
|
61
|
+
"script_span/generated_ulscript.cc",
|
62
|
+
"script_span/generated_ulscript.h",
|
63
|
+
"script_span/getonescriptspan.cc",
|
64
|
+
"script_span/getonescriptspan.h",
|
65
|
+
"script_span/integral_types.h",
|
66
|
+
"script_span/offsetmap.cc",
|
67
|
+
"script_span/offsetmap.h",
|
68
|
+
"script_span/port.h",
|
69
|
+
"script_span/stringpiece.h",
|
70
|
+
"script_span/text_processing.cc",
|
71
|
+
"script_span/text_processing.h",
|
72
|
+
"script_span/utf8acceptinterchange.h",
|
73
|
+
"script_span/utf8prop_lettermarkscriptnum.h",
|
74
|
+
"script_span/utf8repl_lettermarklower.h",
|
75
|
+
"script_span/utf8scannot_lettermarkspecial.h",
|
76
|
+
"script_span/utf8statetable.cc",
|
77
|
+
"script_span/utf8statetable.h",
|
78
|
+
"task_context.cc",
|
79
|
+
"task_context.h",
|
80
|
+
"task_context_params.cc",
|
81
|
+
"task_context_params.h",
|
82
|
+
"unicodetext.cc",
|
83
|
+
"unicodetext.h",
|
84
|
+
"utils.cc",
|
85
|
+
"utils.h",
|
86
|
+
"workspace.cc",
|
87
|
+
"workspace.h",
|
88
|
+
]
|
89
|
+
public_deps = [
|
90
|
+
"//third_party/protobuf:protobuf_lite",
|
91
|
+
":protos",
|
92
|
+
]
|
93
|
+
}
|
94
|
+
|
95
|
+
# The executables below are functional. Uncomment to use.
|
96
|
+
|
97
|
+
#executable("language_identifier_main") {
|
98
|
+
# sources = [
|
99
|
+
# "language_identifier_main.cc",
|
100
|
+
# ]
|
101
|
+
# deps = [
|
102
|
+
# ":cld_3",
|
103
|
+
# ]
|
104
|
+
#}
|
105
|
+
|
106
|
+
#executable("getonescriptspan_test") {
|
107
|
+
# sources = [
|
108
|
+
# "script_span/getonescriptspan_test.cc",
|
109
|
+
# ]
|
110
|
+
# deps = [
|
111
|
+
# ":cld_3",
|
112
|
+
# ]
|
113
|
+
#}
|
114
|
+
|
115
|
+
#executable("language_identifier_features_test") {
|
116
|
+
# sources = [
|
117
|
+
# "language_identifier_features_test.cc",
|
118
|
+
# ]
|
119
|
+
# deps = [
|
120
|
+
# ":cld_3",
|
121
|
+
# ]
|
122
|
+
#}
|
123
|
+
|
124
|
+
#executable("nnet_lang_id_test") {
|
125
|
+
# sources = [
|
126
|
+
# "nnet_lang_id_test.cc",
|
127
|
+
# "nnet_lang_id_test_data.cc",
|
128
|
+
# "nnet_lang_id_test_data.h",
|
129
|
+
# ]
|
130
|
+
# deps = [
|
131
|
+
# ":cld_3",
|
132
|
+
# ]
|
133
|
+
#}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|