cld3 3.2.5 → 3.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/{ext/cld3/ext/LICENSE → LICENSE_CLD3} +0 -0
  3. data/cld3.gemspec +1 -1
  4. data/ext/cld3/Makefile +266 -0
  5. data/ext/cld3/{ext/src/base.cc → base.cc} +0 -0
  6. data/ext/cld3/{ext/src/base.h → base.h} +0 -0
  7. data/ext/cld3/base.o +0 -0
  8. data/ext/cld3/{ext/src/casts.h → casts.h} +0 -0
  9. data/ext/cld3/{ext/src/embedding_feature_extractor.cc → embedding_feature_extractor.cc} +0 -0
  10. data/ext/cld3/{ext/src/embedding_feature_extractor.h → embedding_feature_extractor.h} +0 -0
  11. data/ext/cld3/embedding_feature_extractor.o +0 -0
  12. data/ext/cld3/{ext/src/embedding_network.cc → embedding_network.cc} +0 -0
  13. data/ext/cld3/{ext/src/embedding_network.h → embedding_network.h} +0 -0
  14. data/ext/cld3/embedding_network.o +0 -0
  15. data/ext/cld3/{ext/src/embedding_network_params.h → embedding_network_params.h} +0 -0
  16. data/ext/cld3/{ext/src/feature_extractor.cc → feature_extractor.cc} +0 -0
  17. data/ext/cld3/{ext/src/feature_extractor.h → feature_extractor.h} +0 -0
  18. data/ext/cld3/feature_extractor.o +0 -0
  19. data/ext/cld3/feature_extractor.pb.o +0 -0
  20. data/ext/cld3/{ext/src/feature_extractor.proto → feature_extractor.proto} +0 -0
  21. data/ext/cld3/{ext/src/feature_types.cc → feature_types.cc} +0 -0
  22. data/ext/cld3/{ext/src/feature_types.h → feature_types.h} +0 -0
  23. data/ext/cld3/feature_types.o +0 -0
  24. data/ext/cld3/{ext/src/script_span/fixunicodevalue.cc → fixunicodevalue.cc} +0 -0
  25. data/ext/cld3/{ext/src/script_span/fixunicodevalue.h → fixunicodevalue.h} +0 -0
  26. data/ext/cld3/fixunicodevalue.o +0 -0
  27. data/ext/cld3/{ext/src/float16.h → float16.h} +0 -0
  28. data/ext/cld3/{ext/src/fml_parser.cc → fml_parser.cc} +0 -0
  29. data/ext/cld3/{ext/src/fml_parser.h → fml_parser.h} +0 -0
  30. data/ext/cld3/fml_parser.o +0 -0
  31. data/ext/cld3/{ext/src/script_span/generated_entities.cc → generated_entities.cc} +0 -0
  32. data/ext/cld3/generated_entities.o +0 -0
  33. data/ext/cld3/{ext/src/script_span/generated_ulscript.cc → generated_ulscript.cc} +0 -0
  34. data/ext/cld3/{ext/src/script_span/generated_ulscript.h → generated_ulscript.h} +0 -0
  35. data/ext/cld3/generated_ulscript.o +0 -0
  36. data/ext/cld3/{ext/src/script_span/getonescriptspan.cc → getonescriptspan.cc} +0 -0
  37. data/ext/cld3/{ext/src/script_span/getonescriptspan.h → getonescriptspan.h} +0 -0
  38. data/ext/cld3/getonescriptspan.o +0 -0
  39. data/ext/cld3/{ext/src/script_span/integral_types.h → integral_types.h} +0 -0
  40. data/ext/cld3/{ext/src/lang_id_nn_params.cc → lang_id_nn_params.cc} +0 -0
  41. data/ext/cld3/{ext/src/lang_id_nn_params.h → lang_id_nn_params.h} +0 -0
  42. data/ext/cld3/lang_id_nn_params.o +0 -0
  43. data/ext/cld3/{ext/src/language_identifier_features.cc → language_identifier_features.cc} +0 -0
  44. data/ext/cld3/{ext/src/language_identifier_features.h → language_identifier_features.h} +0 -0
  45. data/ext/cld3/language_identifier_features.o +0 -0
  46. data/ext/cld3/libcld3.so +0 -0
  47. data/ext/cld3/mkmf.log +36 -0
  48. data/ext/cld3/{ext/src/nnet_language_identifier.cc → nnet_language_identifier.cc} +0 -0
  49. data/ext/cld3/{ext/src/nnet_language_identifier.h → nnet_language_identifier.h} +0 -0
  50. data/ext/cld3/nnet_language_identifier.o +0 -0
  51. data/ext/cld3/nnet_language_identifier_c.o +0 -0
  52. data/ext/cld3/{ext/src/script_span/offsetmap.cc → offsetmap.cc} +0 -0
  53. data/ext/cld3/{ext/src/script_span/offsetmap.h → offsetmap.h} +0 -0
  54. data/ext/cld3/offsetmap.o +0 -0
  55. data/ext/cld3/{ext/src/script_span/port.h → port.h} +0 -0
  56. data/ext/cld3/{ext/src/registry.cc → registry.cc} +0 -0
  57. data/ext/cld3/{ext/src/registry.h → registry.h} +0 -0
  58. data/ext/cld3/registry.o +0 -0
  59. data/ext/cld3/{ext/src/relevant_script_feature.cc → relevant_script_feature.cc} +0 -0
  60. data/ext/cld3/{ext/src/relevant_script_feature.h → relevant_script_feature.h} +0 -0
  61. data/ext/cld3/relevant_script_feature.o +0 -0
  62. data/ext/cld3/{ext/src/script_detector.h → script_detector.h} +0 -0
  63. data/ext/cld3/sentence.pb.o +0 -0
  64. data/ext/cld3/{ext/src/sentence.proto → sentence.proto} +0 -0
  65. data/ext/cld3/{ext/src/sentence_features.cc → sentence_features.cc} +0 -0
  66. data/ext/cld3/{ext/src/sentence_features.h → sentence_features.h} +0 -0
  67. data/ext/cld3/sentence_features.o +0 -0
  68. data/ext/cld3/{ext/src/simple_adder.h → simple_adder.h} +0 -0
  69. data/ext/cld3/{ext/src/script_span/stringpiece.h → stringpiece.h} +0 -0
  70. data/ext/cld3/{ext/src/task_context.cc → task_context.cc} +0 -0
  71. data/ext/cld3/{ext/src/task_context.h → task_context.h} +0 -0
  72. data/ext/cld3/task_context.o +0 -0
  73. data/ext/cld3/{ext/src/task_context_params.cc → task_context_params.cc} +0 -0
  74. data/ext/cld3/{ext/src/task_context_params.h → task_context_params.h} +0 -0
  75. data/ext/cld3/task_context_params.o +0 -0
  76. data/ext/cld3/task_spec.pb.o +0 -0
  77. data/ext/cld3/{ext/src/task_spec.proto → task_spec.proto} +0 -0
  78. data/ext/cld3/{ext/src/script_span/text_processing.cc → text_processing.cc} +0 -0
  79. data/ext/cld3/{ext/src/script_span/text_processing.h → text_processing.h} +0 -0
  80. data/ext/cld3/text_processing.o +0 -0
  81. data/ext/cld3/{ext/src/unicodetext.cc → unicodetext.cc} +0 -0
  82. data/ext/cld3/{ext/src/unicodetext.h → unicodetext.h} +0 -0
  83. data/ext/cld3/unicodetext.o +0 -0
  84. data/ext/cld3/{ext/src/script_span/utf8acceptinterchange.h → utf8acceptinterchange.h} +0 -0
  85. data/ext/cld3/{ext/src/script_span/utf8prop_lettermarkscriptnum.h → utf8prop_lettermarkscriptnum.h} +0 -0
  86. data/ext/cld3/{ext/src/script_span/utf8repl_lettermarklower.h → utf8repl_lettermarklower.h} +0 -0
  87. data/ext/cld3/{ext/src/script_span/utf8scannot_lettermarkspecial.h → utf8scannot_lettermarkspecial.h} +0 -0
  88. data/ext/cld3/{ext/src/script_span/utf8statetable.cc → utf8statetable.cc} +0 -0
  89. data/ext/cld3/{ext/src/script_span/utf8statetable.h → utf8statetable.h} +0 -0
  90. data/ext/cld3/utf8statetable.o +0 -0
  91. data/ext/cld3/{ext/src/utils.cc → utils.cc} +0 -0
  92. data/ext/cld3/{ext/src/utils.h → utils.h} +0 -0
  93. data/ext/cld3/utils.o +0 -0
  94. data/ext/cld3/{ext/src/workspace.cc → workspace.cc} +0 -0
  95. data/ext/cld3/{ext/src/workspace.h → workspace.h} +0 -0
  96. data/ext/cld3/workspace.o +0 -0
  97. metadata +96 -81
  98. data/ext/cld3/ext/CMakeLists.txt +0 -69
  99. data/ext/cld3/ext/CONTRIBUTING.md +0 -26
  100. data/ext/cld3/ext/README.md +0 -73
  101. data/ext/cld3/ext/misc/myprotobuf.cmake +0 -58
  102. data/ext/cld3/ext/model.png +0 -0
  103. data/ext/cld3/ext/src/BUILD.gn +0 -133
  104. data/ext/cld3/ext/src/DEPS +0 -4
  105. data/ext/cld3/ext/src/language_identifier_features_test.cc +0 -261
  106. data/ext/cld3/ext/src/language_identifier_main.cc +0 -54
  107. data/ext/cld3/ext/src/nnet_lang_id_test.cc +0 -254
  108. data/ext/cld3/ext/src/nnet_lang_id_test_data.cc +0 -529
  109. data/ext/cld3/ext/src/nnet_lang_id_test_data.h +0 -117
  110. data/ext/cld3/ext/src/relevant_script_feature_test.cc +0 -259
  111. data/ext/cld3/ext/src/script_detector_test.cc +0 -161
  112. data/ext/cld3/ext/src/script_span/README.md +0 -11
  113. data/ext/cld3/ext/src/script_span/getonescriptspan_test.cc +0 -135
Binary file
Binary file
Binary file
Binary file
File without changes
File without changes
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cld3
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.5
4
+ version: 3.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Akihiko Odaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-03 00:00:00.000000000 Z
11
+ date: 2020-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -60,89 +60,104 @@ extra_rdoc_files: []
60
60
  files:
61
61
  - Gemfile
62
62
  - LICENSE
63
+ - LICENSE_CLD3
63
64
  - README.md
64
65
  - cld3.gemspec
65
- - ext/cld3/ext/CMakeLists.txt
66
- - ext/cld3/ext/CONTRIBUTING.md
67
- - ext/cld3/ext/LICENSE
68
- - ext/cld3/ext/README.md
69
- - ext/cld3/ext/misc/myprotobuf.cmake
70
- - ext/cld3/ext/model.png
71
- - ext/cld3/ext/src/BUILD.gn
72
- - ext/cld3/ext/src/DEPS
73
- - ext/cld3/ext/src/base.cc
74
- - ext/cld3/ext/src/base.h
75
- - ext/cld3/ext/src/casts.h
76
- - ext/cld3/ext/src/embedding_feature_extractor.cc
77
- - ext/cld3/ext/src/embedding_feature_extractor.h
78
- - ext/cld3/ext/src/embedding_network.cc
79
- - ext/cld3/ext/src/embedding_network.h
80
- - ext/cld3/ext/src/embedding_network_params.h
81
- - ext/cld3/ext/src/feature_extractor.cc
82
- - ext/cld3/ext/src/feature_extractor.h
83
- - ext/cld3/ext/src/feature_extractor.proto
84
- - ext/cld3/ext/src/feature_types.cc
85
- - ext/cld3/ext/src/feature_types.h
86
- - ext/cld3/ext/src/float16.h
87
- - ext/cld3/ext/src/fml_parser.cc
88
- - ext/cld3/ext/src/fml_parser.h
89
- - ext/cld3/ext/src/lang_id_nn_params.cc
90
- - ext/cld3/ext/src/lang_id_nn_params.h
91
- - ext/cld3/ext/src/language_identifier_features.cc
92
- - ext/cld3/ext/src/language_identifier_features.h
93
- - ext/cld3/ext/src/language_identifier_features_test.cc
94
- - ext/cld3/ext/src/language_identifier_main.cc
95
- - ext/cld3/ext/src/nnet_lang_id_test.cc
96
- - ext/cld3/ext/src/nnet_lang_id_test_data.cc
97
- - ext/cld3/ext/src/nnet_lang_id_test_data.h
98
- - ext/cld3/ext/src/nnet_language_identifier.cc
99
- - ext/cld3/ext/src/nnet_language_identifier.h
100
- - ext/cld3/ext/src/registry.cc
101
- - ext/cld3/ext/src/registry.h
102
- - ext/cld3/ext/src/relevant_script_feature.cc
103
- - ext/cld3/ext/src/relevant_script_feature.h
104
- - ext/cld3/ext/src/relevant_script_feature_test.cc
105
- - ext/cld3/ext/src/script_detector.h
106
- - ext/cld3/ext/src/script_detector_test.cc
107
- - ext/cld3/ext/src/script_span/README.md
108
- - ext/cld3/ext/src/script_span/fixunicodevalue.cc
109
- - ext/cld3/ext/src/script_span/fixunicodevalue.h
110
- - ext/cld3/ext/src/script_span/generated_entities.cc
111
- - ext/cld3/ext/src/script_span/generated_ulscript.cc
112
- - ext/cld3/ext/src/script_span/generated_ulscript.h
113
- - ext/cld3/ext/src/script_span/getonescriptspan.cc
114
- - ext/cld3/ext/src/script_span/getonescriptspan.h
115
- - ext/cld3/ext/src/script_span/getonescriptspan_test.cc
116
- - ext/cld3/ext/src/script_span/integral_types.h
117
- - ext/cld3/ext/src/script_span/offsetmap.cc
118
- - ext/cld3/ext/src/script_span/offsetmap.h
119
- - ext/cld3/ext/src/script_span/port.h
120
- - ext/cld3/ext/src/script_span/stringpiece.h
121
- - ext/cld3/ext/src/script_span/text_processing.cc
122
- - ext/cld3/ext/src/script_span/text_processing.h
123
- - ext/cld3/ext/src/script_span/utf8acceptinterchange.h
124
- - ext/cld3/ext/src/script_span/utf8prop_lettermarkscriptnum.h
125
- - ext/cld3/ext/src/script_span/utf8repl_lettermarklower.h
126
- - ext/cld3/ext/src/script_span/utf8scannot_lettermarkspecial.h
127
- - ext/cld3/ext/src/script_span/utf8statetable.cc
128
- - ext/cld3/ext/src/script_span/utf8statetable.h
129
- - ext/cld3/ext/src/sentence.proto
130
- - ext/cld3/ext/src/sentence_features.cc
131
- - ext/cld3/ext/src/sentence_features.h
132
- - ext/cld3/ext/src/simple_adder.h
133
- - ext/cld3/ext/src/task_context.cc
134
- - ext/cld3/ext/src/task_context.h
135
- - ext/cld3/ext/src/task_context_params.cc
136
- - ext/cld3/ext/src/task_context_params.h
137
- - ext/cld3/ext/src/task_spec.proto
138
- - ext/cld3/ext/src/unicodetext.cc
139
- - ext/cld3/ext/src/unicodetext.h
140
- - ext/cld3/ext/src/utils.cc
141
- - ext/cld3/ext/src/utils.h
142
- - ext/cld3/ext/src/workspace.cc
143
- - ext/cld3/ext/src/workspace.h
66
+ - ext/cld3/Makefile
67
+ - ext/cld3/base.cc
68
+ - ext/cld3/base.h
69
+ - ext/cld3/base.o
70
+ - ext/cld3/casts.h
71
+ - ext/cld3/embedding_feature_extractor.cc
72
+ - ext/cld3/embedding_feature_extractor.h
73
+ - ext/cld3/embedding_feature_extractor.o
74
+ - ext/cld3/embedding_network.cc
75
+ - ext/cld3/embedding_network.h
76
+ - ext/cld3/embedding_network.o
77
+ - ext/cld3/embedding_network_params.h
144
78
  - ext/cld3/extconf.rb
79
+ - ext/cld3/feature_extractor.cc
80
+ - ext/cld3/feature_extractor.h
81
+ - ext/cld3/feature_extractor.o
82
+ - ext/cld3/feature_extractor.pb.o
83
+ - ext/cld3/feature_extractor.proto
84
+ - ext/cld3/feature_types.cc
85
+ - ext/cld3/feature_types.h
86
+ - ext/cld3/feature_types.o
87
+ - ext/cld3/fixunicodevalue.cc
88
+ - ext/cld3/fixunicodevalue.h
89
+ - ext/cld3/fixunicodevalue.o
90
+ - ext/cld3/float16.h
91
+ - ext/cld3/fml_parser.cc
92
+ - ext/cld3/fml_parser.h
93
+ - ext/cld3/fml_parser.o
94
+ - ext/cld3/generated_entities.cc
95
+ - ext/cld3/generated_entities.o
96
+ - ext/cld3/generated_ulscript.cc
97
+ - ext/cld3/generated_ulscript.h
98
+ - ext/cld3/generated_ulscript.o
99
+ - ext/cld3/getonescriptspan.cc
100
+ - ext/cld3/getonescriptspan.h
101
+ - ext/cld3/getonescriptspan.o
102
+ - ext/cld3/integral_types.h
103
+ - ext/cld3/lang_id_nn_params.cc
104
+ - ext/cld3/lang_id_nn_params.h
105
+ - ext/cld3/lang_id_nn_params.o
106
+ - ext/cld3/language_identifier_features.cc
107
+ - ext/cld3/language_identifier_features.h
108
+ - ext/cld3/language_identifier_features.o
109
+ - ext/cld3/libcld3.so
110
+ - ext/cld3/mkmf.log
111
+ - ext/cld3/nnet_language_identifier.cc
112
+ - ext/cld3/nnet_language_identifier.h
113
+ - ext/cld3/nnet_language_identifier.o
145
114
  - ext/cld3/nnet_language_identifier_c.cc
115
+ - ext/cld3/nnet_language_identifier_c.o
116
+ - ext/cld3/offsetmap.cc
117
+ - ext/cld3/offsetmap.h
118
+ - ext/cld3/offsetmap.o
119
+ - ext/cld3/port.h
120
+ - ext/cld3/registry.cc
121
+ - ext/cld3/registry.h
122
+ - ext/cld3/registry.o
123
+ - ext/cld3/relevant_script_feature.cc
124
+ - ext/cld3/relevant_script_feature.h
125
+ - ext/cld3/relevant_script_feature.o
126
+ - ext/cld3/script_detector.h
127
+ - ext/cld3/sentence.pb.o
128
+ - ext/cld3/sentence.proto
129
+ - ext/cld3/sentence_features.cc
130
+ - ext/cld3/sentence_features.h
131
+ - ext/cld3/sentence_features.o
132
+ - ext/cld3/simple_adder.h
133
+ - ext/cld3/stringpiece.h
134
+ - ext/cld3/task_context.cc
135
+ - ext/cld3/task_context.h
136
+ - ext/cld3/task_context.o
137
+ - ext/cld3/task_context_params.cc
138
+ - ext/cld3/task_context_params.h
139
+ - ext/cld3/task_context_params.o
140
+ - ext/cld3/task_spec.pb.o
141
+ - ext/cld3/task_spec.proto
142
+ - ext/cld3/text_processing.cc
143
+ - ext/cld3/text_processing.h
144
+ - ext/cld3/text_processing.o
145
+ - ext/cld3/unicodetext.cc
146
+ - ext/cld3/unicodetext.h
147
+ - ext/cld3/unicodetext.o
148
+ - ext/cld3/utf8acceptinterchange.h
149
+ - ext/cld3/utf8prop_lettermarkscriptnum.h
150
+ - ext/cld3/utf8repl_lettermarklower.h
151
+ - ext/cld3/utf8scannot_lettermarkspecial.h
152
+ - ext/cld3/utf8statetable.cc
153
+ - ext/cld3/utf8statetable.h
154
+ - ext/cld3/utf8statetable.o
155
+ - ext/cld3/utils.cc
156
+ - ext/cld3/utils.h
157
+ - ext/cld3/utils.o
158
+ - ext/cld3/workspace.cc
159
+ - ext/cld3/workspace.h
160
+ - ext/cld3/workspace.o
146
161
  - lib/cld3.rb
147
162
  homepage: https://github.com/akihikodaki/cld3-ruby
148
163
  licenses:
@@ -1,69 +0,0 @@
1
- # This cmake scripts only builds a static cld3 lib and the unittests.
2
-
3
- project(cld3)
4
-
5
- # Old versions of cmake dont search/find protobuf lite
6
- cmake_minimum_required(VERSION 3.9)
7
-
8
- find_package(Protobuf REQUIRED)
9
- message(STATUS "Protobuf_FOUND= ${Protobuf_FOUND}")
10
- message(STATUS "Protobuf_VERSION= ${Protobuf_VERSION}")
11
- message(WARNING "Protobuf 2.5 and CLD3 seems happy together. This script does NOT check if your verison of protobuf is compatible.")
12
- message(STATUS "Protobuf_LIBRARIES= ${Protobuf_LIBRARIES}")
13
- message(STATUS "Protobuf_LITE_LIBRARIES= ${Protobuf_LITE_LIBRARIES}") # Usually /usr/lib64/libprotobuf-lite.so
14
-
15
- # By default, protobuf_generate_cpp generates pb.* files directy in the cmake build dir.
16
- # But CLD3 sources have been coded using hard coded pathes to cld_3/protos/*.pb.h.
17
- # So *.pb.h must be output to cld_3/protos.
18
- # For that, let's use a custom my_protobuf_generate_cpp:
19
- include(${CMAKE_CURRENT_SOURCE_DIR}/misc/myprotobuf.cmake)
20
- my_protobuf_generate_cpp(cld_3/protos PROTO_SRCS PROTO_HDRS src/feature_extractor.proto src/sentence.proto src/task_spec.proto)
21
- message(STATUS "PROTO_HDRS= ${PROTO_HDRS}")
22
-
23
- add_definitions(-fPIC) # Position Independant Code
24
- add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
25
- add_definitions(-std=c++11) # Needed for std::to_string(), ...
26
-
27
- include_directories(${CMAKE_CURRENT_BINARY_DIR}) # needed to include generated pb headers
28
-
29
- add_library(${PROJECT_NAME}
30
- ${PROTO_SRCS} ${PROTO_HDRS}
31
- src/base.cc
32
- src/embedding_feature_extractor.cc
33
- src/embedding_network.cc
34
- src/feature_extractor.cc
35
- src/feature_extractor.h
36
- src/feature_types.cc
37
- src/fml_parser.cc
38
- src/language_identifier_features.cc
39
- src/lang_id_nn_params.cc
40
- src/nnet_language_identifier.cc
41
- src/registry.cc
42
- src/relevant_script_feature.cc
43
- src/sentence_features.cc
44
- src/task_context.cc
45
- src/task_context_params.cc
46
- src/unicodetext.cc
47
- src/utils.cc
48
- src/workspace.cc
49
-
50
- src/script_span/generated_entities.cc
51
- src/script_span/getonescriptspan.cc
52
- src/script_span/getonescriptspan.h
53
- src/script_span/getonescriptspan_test.cc
54
- src/script_span/utf8statetable.cc
55
- src/script_span/offsetmap.cc
56
- src/script_span/text_processing.cc
57
- src/script_span/text_processing.h
58
- src/script_span/fixunicodevalue.cc
59
- )
60
-
61
- # unit tests exec:
62
- add_executable(language_identifier_main src/language_identifier_main.cc)
63
- target_link_libraries(language_identifier_main cld3 ${Protobuf_LITE_LIBRARIES})
64
-
65
- add_executable(getonescriptspan_test src/script_span/getonescriptspan_test.cc)
66
- target_link_libraries(getonescriptspan_test cld3 ${Protobuf_LITE_LIBRARIES})
67
-
68
- add_executable(language_identifier_features_test src/language_identifier_features_test.cc)
69
- target_link_libraries(language_identifier_features_test cld3 ${Protobuf_LITE_LIBRARIES})
@@ -1,26 +0,0 @@
1
- Want to contribute? Great! First, read this page (including the small print at
2
- the end).
3
-
4
- ### Before you contribute
5
- Before we can use your code, you must sign the
6
- [Google Individual Contributor License Agreement](https://cla.developers.google.com/about/google-individual)
7
- (CLA), which you can do online. The CLA is necessary mainly because you own the
8
- copyright to your changes, even after your contribution becomes part of our
9
- codebase, so we need your permission to use and distribute your code. We also
10
- need to be sure of various other things—for instance that you'll tell us if you
11
- know that your code infringes on other people's patents. You don't have to sign
12
- the CLA until after you've submitted your code for review and a member has
13
- approved it, but you must do it before we can put your code into our codebase.
14
- Before you start working on a larger contribution, you should get in touch with
15
- us first through the issue tracker with your idea so that we can help out and
16
- possibly guide you. Coordinating up front makes it much easier to avoid
17
- frustration later on.
18
-
19
- ### Code reviews
20
- All submissions, including submissions by project members, require review. We
21
- use Github pull requests for this purpose.
22
-
23
- ### The small print
24
- Contributions made by corporations are covered by a different agreement than
25
- the one above, the
26
- [Software Grant and Corporate Contributor License Agreement](https://cla.developers.google.com/about/google-corporate).
@@ -1,73 +0,0 @@
1
- # Compact Language Detector v3 (CLD3)
2
-
3
- * [Model](#model)
4
- * [Installation](#installation)
5
- * [Contact](#contact)
6
- * [Credits](#credits)
7
-
8
- ### Model
9
-
10
- CLD3 is a neural network model for language identification. This package
11
- contains the inference code and a trained model. The inference code
12
- extracts character ngrams from the input text and computes the fraction
13
- of times each of them appears. For example, as shown in the figure below,
14
- if the input text is "banana", then one of the extracted trigrams is "ana"
15
- and the corresponding fraction is 2/4. The ngrams are hashed down to an id
16
- within a small range, and each id is represented by a dense embedding vector
17
- estimated during training.
18
-
19
- The model averages the embeddings corresponding to each ngram type according
20
- to the fractions, and the averaged embeddings are concatenated to produce
21
- the embedding layer. The remaining components of the network are a hidden
22
- (Rectified linear) layer and a softmax layer.
23
-
24
- To get a language prediction for the input text, we simply perform a forward
25
- pass through the network.
26
-
27
- ![Figure](model.png "CLD3")
28
-
29
- ### Installation
30
- CLD3 is designed to run in the Chrome browser, so it relies on code in
31
- [Chromium](http://www.chromium.org/).
32
- The steps for building and running the demo of the language detection model are:
33
-
34
- - [check out](http://www.chromium.org/developers/how-tos/get-the-code) the
35
- Chromium repository.
36
- - copy the code to `//third_party/cld_3`
37
- - Uncomment `language_identifier_main` executable in `src/BUILD.gn`.
38
- - build and run the model using the commands:
39
-
40
- ```shell
41
- gn gen out/Default
42
- ninja -C out/Default third_party/cld_3/src/src:language_identifier_main
43
- out/Default/language_identifier_main
44
- ```
45
- ### Bugs and Feature Requests
46
-
47
- Open a [GitHub issue](https://github.com/google/cld3/issues) for this repository to file bugs and feature requests.
48
-
49
- ### Announcements and Discussion
50
-
51
- For announcements regarding major updates as well as general discussion list, please subscribe to:
52
- [cld3-users@googlegroups.com](https://groups.google.com/forum/#!forum/cld3-users)
53
-
54
- ### Credits
55
-
56
- Original authors of the code in this package include (in alphabetical order):
57
-
58
- * Alex Salcianu
59
- * Andy Golding
60
- * Anton Bakalov
61
- * Chris Alberti
62
- * Daniel Andor
63
- * David Weiss
64
- * Emily Pitler
65
- * Greg Coppola
66
- * Jason Riesa
67
- * Kuzman Ganchev
68
- * Michael Ringgaard
69
- * Nan Hua
70
- * Ryan McDonald
71
- * Slav Petrov
72
- * Stefan Istrate
73
- * Terry Koo
@@ -1,58 +0,0 @@
1
- # Special PROTOBUF_GENERATE_CPP which allows to set the output folder:
2
- # From https://stackoverflow.com/users/1600278/akira-okumura
3
-
4
- function(MY_PROTOBUF_GENERATE_CPP PATH SRCS HDRS)
5
- if(NOT ARGN)
6
- message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")
7
- return()
8
- endif()
9
-
10
- if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
11
- # Create an include path for each file specified
12
- foreach(FIL ${ARGN})
13
- get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
14
- get_filename_component(ABS_PATH ${ABS_FIL} PATH)
15
- list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
16
- if(${_contains_already} EQUAL -1)
17
- list(APPEND _protobuf_include_path -I ${ABS_PATH})
18
- endif()
19
- endforeach()
20
- else()
21
- set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
22
- endif()
23
-
24
- if(DEFINED PROTOBUF_IMPORT_DIRS)
25
- foreach(DIR ${PROTOBUF_IMPORT_DIRS})
26
- get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
27
- list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
28
- if(${_contains_already} EQUAL -1)
29
- list(APPEND _protobuf_include_path -I ${ABS_PATH})
30
- endif()
31
- endforeach()
32
- endif()
33
-
34
- set(${SRCS})
35
- set(${HDRS})
36
- foreach(FIL ${ARGN})
37
- get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
38
- get_filename_component(FIL_WE ${FIL} NAME_WE)
39
-
40
- list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.cc")
41
- list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.h")
42
-
43
- execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/${PATH})
44
-
45
- add_custom_command(
46
- OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.cc"
47
- "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.h"
48
- COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
49
- ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR}/${PATH} ${_protobuf_include_path} ${ABS_FIL}
50
- DEPENDS ${ABS_FIL}
51
- COMMENT "Running C++ protocol buffer compiler on ${FIL}"
52
- VERBATIM )
53
- endforeach()
54
-
55
- set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
56
- set(${SRCS} ${${SRCS}} PARENT_SCOPE)
57
- set(${HDRS} ${${HDRS}} PARENT_SCOPE)
58
- endfunction()