cld3 3.2.5 → 3.2.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/{ext/cld3/ext/LICENSE → LICENSE_CLD3} +0 -0
  3. data/cld3.gemspec +1 -1
  4. data/ext/cld3/Makefile +266 -0
  5. data/ext/cld3/{ext/src/base.cc → base.cc} +0 -0
  6. data/ext/cld3/{ext/src/base.h → base.h} +0 -0
  7. data/ext/cld3/base.o +0 -0
  8. data/ext/cld3/{ext/src/casts.h → casts.h} +0 -0
  9. data/ext/cld3/{ext/src/embedding_feature_extractor.cc → embedding_feature_extractor.cc} +0 -0
  10. data/ext/cld3/{ext/src/embedding_feature_extractor.h → embedding_feature_extractor.h} +0 -0
  11. data/ext/cld3/embedding_feature_extractor.o +0 -0
  12. data/ext/cld3/{ext/src/embedding_network.cc → embedding_network.cc} +0 -0
  13. data/ext/cld3/{ext/src/embedding_network.h → embedding_network.h} +0 -0
  14. data/ext/cld3/embedding_network.o +0 -0
  15. data/ext/cld3/{ext/src/embedding_network_params.h → embedding_network_params.h} +0 -0
  16. data/ext/cld3/{ext/src/feature_extractor.cc → feature_extractor.cc} +0 -0
  17. data/ext/cld3/{ext/src/feature_extractor.h → feature_extractor.h} +0 -0
  18. data/ext/cld3/feature_extractor.o +0 -0
  19. data/ext/cld3/feature_extractor.pb.o +0 -0
  20. data/ext/cld3/{ext/src/feature_extractor.proto → feature_extractor.proto} +0 -0
  21. data/ext/cld3/{ext/src/feature_types.cc → feature_types.cc} +0 -0
  22. data/ext/cld3/{ext/src/feature_types.h → feature_types.h} +0 -0
  23. data/ext/cld3/feature_types.o +0 -0
  24. data/ext/cld3/{ext/src/script_span/fixunicodevalue.cc → fixunicodevalue.cc} +0 -0
  25. data/ext/cld3/{ext/src/script_span/fixunicodevalue.h → fixunicodevalue.h} +0 -0
  26. data/ext/cld3/fixunicodevalue.o +0 -0
  27. data/ext/cld3/{ext/src/float16.h → float16.h} +0 -0
  28. data/ext/cld3/{ext/src/fml_parser.cc → fml_parser.cc} +0 -0
  29. data/ext/cld3/{ext/src/fml_parser.h → fml_parser.h} +0 -0
  30. data/ext/cld3/fml_parser.o +0 -0
  31. data/ext/cld3/{ext/src/script_span/generated_entities.cc → generated_entities.cc} +0 -0
  32. data/ext/cld3/generated_entities.o +0 -0
  33. data/ext/cld3/{ext/src/script_span/generated_ulscript.cc → generated_ulscript.cc} +0 -0
  34. data/ext/cld3/{ext/src/script_span/generated_ulscript.h → generated_ulscript.h} +0 -0
  35. data/ext/cld3/generated_ulscript.o +0 -0
  36. data/ext/cld3/{ext/src/script_span/getonescriptspan.cc → getonescriptspan.cc} +0 -0
  37. data/ext/cld3/{ext/src/script_span/getonescriptspan.h → getonescriptspan.h} +0 -0
  38. data/ext/cld3/getonescriptspan.o +0 -0
  39. data/ext/cld3/{ext/src/script_span/integral_types.h → integral_types.h} +0 -0
  40. data/ext/cld3/{ext/src/lang_id_nn_params.cc → lang_id_nn_params.cc} +0 -0
  41. data/ext/cld3/{ext/src/lang_id_nn_params.h → lang_id_nn_params.h} +0 -0
  42. data/ext/cld3/lang_id_nn_params.o +0 -0
  43. data/ext/cld3/{ext/src/language_identifier_features.cc → language_identifier_features.cc} +0 -0
  44. data/ext/cld3/{ext/src/language_identifier_features.h → language_identifier_features.h} +0 -0
  45. data/ext/cld3/language_identifier_features.o +0 -0
  46. data/ext/cld3/libcld3.so +0 -0
  47. data/ext/cld3/mkmf.log +36 -0
  48. data/ext/cld3/{ext/src/nnet_language_identifier.cc → nnet_language_identifier.cc} +0 -0
  49. data/ext/cld3/{ext/src/nnet_language_identifier.h → nnet_language_identifier.h} +0 -0
  50. data/ext/cld3/nnet_language_identifier.o +0 -0
  51. data/ext/cld3/nnet_language_identifier_c.o +0 -0
  52. data/ext/cld3/{ext/src/script_span/offsetmap.cc → offsetmap.cc} +0 -0
  53. data/ext/cld3/{ext/src/script_span/offsetmap.h → offsetmap.h} +0 -0
  54. data/ext/cld3/offsetmap.o +0 -0
  55. data/ext/cld3/{ext/src/script_span/port.h → port.h} +0 -0
  56. data/ext/cld3/{ext/src/registry.cc → registry.cc} +0 -0
  57. data/ext/cld3/{ext/src/registry.h → registry.h} +0 -0
  58. data/ext/cld3/registry.o +0 -0
  59. data/ext/cld3/{ext/src/relevant_script_feature.cc → relevant_script_feature.cc} +0 -0
  60. data/ext/cld3/{ext/src/relevant_script_feature.h → relevant_script_feature.h} +0 -0
  61. data/ext/cld3/relevant_script_feature.o +0 -0
  62. data/ext/cld3/{ext/src/script_detector.h → script_detector.h} +0 -0
  63. data/ext/cld3/sentence.pb.o +0 -0
  64. data/ext/cld3/{ext/src/sentence.proto → sentence.proto} +0 -0
  65. data/ext/cld3/{ext/src/sentence_features.cc → sentence_features.cc} +0 -0
  66. data/ext/cld3/{ext/src/sentence_features.h → sentence_features.h} +0 -0
  67. data/ext/cld3/sentence_features.o +0 -0
  68. data/ext/cld3/{ext/src/simple_adder.h → simple_adder.h} +0 -0
  69. data/ext/cld3/{ext/src/script_span/stringpiece.h → stringpiece.h} +0 -0
  70. data/ext/cld3/{ext/src/task_context.cc → task_context.cc} +0 -0
  71. data/ext/cld3/{ext/src/task_context.h → task_context.h} +0 -0
  72. data/ext/cld3/task_context.o +0 -0
  73. data/ext/cld3/{ext/src/task_context_params.cc → task_context_params.cc} +0 -0
  74. data/ext/cld3/{ext/src/task_context_params.h → task_context_params.h} +0 -0
  75. data/ext/cld3/task_context_params.o +0 -0
  76. data/ext/cld3/task_spec.pb.o +0 -0
  77. data/ext/cld3/{ext/src/task_spec.proto → task_spec.proto} +0 -0
  78. data/ext/cld3/{ext/src/script_span/text_processing.cc → text_processing.cc} +0 -0
  79. data/ext/cld3/{ext/src/script_span/text_processing.h → text_processing.h} +0 -0
  80. data/ext/cld3/text_processing.o +0 -0
  81. data/ext/cld3/{ext/src/unicodetext.cc → unicodetext.cc} +0 -0
  82. data/ext/cld3/{ext/src/unicodetext.h → unicodetext.h} +0 -0
  83. data/ext/cld3/unicodetext.o +0 -0
  84. data/ext/cld3/{ext/src/script_span/utf8acceptinterchange.h → utf8acceptinterchange.h} +0 -0
  85. data/ext/cld3/{ext/src/script_span/utf8prop_lettermarkscriptnum.h → utf8prop_lettermarkscriptnum.h} +0 -0
  86. data/ext/cld3/{ext/src/script_span/utf8repl_lettermarklower.h → utf8repl_lettermarklower.h} +0 -0
  87. data/ext/cld3/{ext/src/script_span/utf8scannot_lettermarkspecial.h → utf8scannot_lettermarkspecial.h} +0 -0
  88. data/ext/cld3/{ext/src/script_span/utf8statetable.cc → utf8statetable.cc} +0 -0
  89. data/ext/cld3/{ext/src/script_span/utf8statetable.h → utf8statetable.h} +0 -0
  90. data/ext/cld3/utf8statetable.o +0 -0
  91. data/ext/cld3/{ext/src/utils.cc → utils.cc} +0 -0
  92. data/ext/cld3/{ext/src/utils.h → utils.h} +0 -0
  93. data/ext/cld3/utils.o +0 -0
  94. data/ext/cld3/{ext/src/workspace.cc → workspace.cc} +0 -0
  95. data/ext/cld3/{ext/src/workspace.h → workspace.h} +0 -0
  96. data/ext/cld3/workspace.o +0 -0
  97. metadata +96 -81
  98. data/ext/cld3/ext/CMakeLists.txt +0 -69
  99. data/ext/cld3/ext/CONTRIBUTING.md +0 -26
  100. data/ext/cld3/ext/README.md +0 -73
  101. data/ext/cld3/ext/misc/myprotobuf.cmake +0 -58
  102. data/ext/cld3/ext/model.png +0 -0
  103. data/ext/cld3/ext/src/BUILD.gn +0 -133
  104. data/ext/cld3/ext/src/DEPS +0 -4
  105. data/ext/cld3/ext/src/language_identifier_features_test.cc +0 -261
  106. data/ext/cld3/ext/src/language_identifier_main.cc +0 -54
  107. data/ext/cld3/ext/src/nnet_lang_id_test.cc +0 -254
  108. data/ext/cld3/ext/src/nnet_lang_id_test_data.cc +0 -529
  109. data/ext/cld3/ext/src/nnet_lang_id_test_data.h +0 -117
  110. data/ext/cld3/ext/src/relevant_script_feature_test.cc +0 -259
  111. data/ext/cld3/ext/src/script_detector_test.cc +0 -161
  112. data/ext/cld3/ext/src/script_span/README.md +0 -11
  113. data/ext/cld3/ext/src/script_span/getonescriptspan_test.cc +0 -135
Binary file
Binary file
Binary file
Binary file
File without changes
File without changes
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cld3
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.5
4
+ version: 3.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Akihiko Odaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-03 00:00:00.000000000 Z
11
+ date: 2020-01-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -60,89 +60,104 @@ extra_rdoc_files: []
60
60
  files:
61
61
  - Gemfile
62
62
  - LICENSE
63
+ - LICENSE_CLD3
63
64
  - README.md
64
65
  - cld3.gemspec
65
- - ext/cld3/ext/CMakeLists.txt
66
- - ext/cld3/ext/CONTRIBUTING.md
67
- - ext/cld3/ext/LICENSE
68
- - ext/cld3/ext/README.md
69
- - ext/cld3/ext/misc/myprotobuf.cmake
70
- - ext/cld3/ext/model.png
71
- - ext/cld3/ext/src/BUILD.gn
72
- - ext/cld3/ext/src/DEPS
73
- - ext/cld3/ext/src/base.cc
74
- - ext/cld3/ext/src/base.h
75
- - ext/cld3/ext/src/casts.h
76
- - ext/cld3/ext/src/embedding_feature_extractor.cc
77
- - ext/cld3/ext/src/embedding_feature_extractor.h
78
- - ext/cld3/ext/src/embedding_network.cc
79
- - ext/cld3/ext/src/embedding_network.h
80
- - ext/cld3/ext/src/embedding_network_params.h
81
- - ext/cld3/ext/src/feature_extractor.cc
82
- - ext/cld3/ext/src/feature_extractor.h
83
- - ext/cld3/ext/src/feature_extractor.proto
84
- - ext/cld3/ext/src/feature_types.cc
85
- - ext/cld3/ext/src/feature_types.h
86
- - ext/cld3/ext/src/float16.h
87
- - ext/cld3/ext/src/fml_parser.cc
88
- - ext/cld3/ext/src/fml_parser.h
89
- - ext/cld3/ext/src/lang_id_nn_params.cc
90
- - ext/cld3/ext/src/lang_id_nn_params.h
91
- - ext/cld3/ext/src/language_identifier_features.cc
92
- - ext/cld3/ext/src/language_identifier_features.h
93
- - ext/cld3/ext/src/language_identifier_features_test.cc
94
- - ext/cld3/ext/src/language_identifier_main.cc
95
- - ext/cld3/ext/src/nnet_lang_id_test.cc
96
- - ext/cld3/ext/src/nnet_lang_id_test_data.cc
97
- - ext/cld3/ext/src/nnet_lang_id_test_data.h
98
- - ext/cld3/ext/src/nnet_language_identifier.cc
99
- - ext/cld3/ext/src/nnet_language_identifier.h
100
- - ext/cld3/ext/src/registry.cc
101
- - ext/cld3/ext/src/registry.h
102
- - ext/cld3/ext/src/relevant_script_feature.cc
103
- - ext/cld3/ext/src/relevant_script_feature.h
104
- - ext/cld3/ext/src/relevant_script_feature_test.cc
105
- - ext/cld3/ext/src/script_detector.h
106
- - ext/cld3/ext/src/script_detector_test.cc
107
- - ext/cld3/ext/src/script_span/README.md
108
- - ext/cld3/ext/src/script_span/fixunicodevalue.cc
109
- - ext/cld3/ext/src/script_span/fixunicodevalue.h
110
- - ext/cld3/ext/src/script_span/generated_entities.cc
111
- - ext/cld3/ext/src/script_span/generated_ulscript.cc
112
- - ext/cld3/ext/src/script_span/generated_ulscript.h
113
- - ext/cld3/ext/src/script_span/getonescriptspan.cc
114
- - ext/cld3/ext/src/script_span/getonescriptspan.h
115
- - ext/cld3/ext/src/script_span/getonescriptspan_test.cc
116
- - ext/cld3/ext/src/script_span/integral_types.h
117
- - ext/cld3/ext/src/script_span/offsetmap.cc
118
- - ext/cld3/ext/src/script_span/offsetmap.h
119
- - ext/cld3/ext/src/script_span/port.h
120
- - ext/cld3/ext/src/script_span/stringpiece.h
121
- - ext/cld3/ext/src/script_span/text_processing.cc
122
- - ext/cld3/ext/src/script_span/text_processing.h
123
- - ext/cld3/ext/src/script_span/utf8acceptinterchange.h
124
- - ext/cld3/ext/src/script_span/utf8prop_lettermarkscriptnum.h
125
- - ext/cld3/ext/src/script_span/utf8repl_lettermarklower.h
126
- - ext/cld3/ext/src/script_span/utf8scannot_lettermarkspecial.h
127
- - ext/cld3/ext/src/script_span/utf8statetable.cc
128
- - ext/cld3/ext/src/script_span/utf8statetable.h
129
- - ext/cld3/ext/src/sentence.proto
130
- - ext/cld3/ext/src/sentence_features.cc
131
- - ext/cld3/ext/src/sentence_features.h
132
- - ext/cld3/ext/src/simple_adder.h
133
- - ext/cld3/ext/src/task_context.cc
134
- - ext/cld3/ext/src/task_context.h
135
- - ext/cld3/ext/src/task_context_params.cc
136
- - ext/cld3/ext/src/task_context_params.h
137
- - ext/cld3/ext/src/task_spec.proto
138
- - ext/cld3/ext/src/unicodetext.cc
139
- - ext/cld3/ext/src/unicodetext.h
140
- - ext/cld3/ext/src/utils.cc
141
- - ext/cld3/ext/src/utils.h
142
- - ext/cld3/ext/src/workspace.cc
143
- - ext/cld3/ext/src/workspace.h
66
+ - ext/cld3/Makefile
67
+ - ext/cld3/base.cc
68
+ - ext/cld3/base.h
69
+ - ext/cld3/base.o
70
+ - ext/cld3/casts.h
71
+ - ext/cld3/embedding_feature_extractor.cc
72
+ - ext/cld3/embedding_feature_extractor.h
73
+ - ext/cld3/embedding_feature_extractor.o
74
+ - ext/cld3/embedding_network.cc
75
+ - ext/cld3/embedding_network.h
76
+ - ext/cld3/embedding_network.o
77
+ - ext/cld3/embedding_network_params.h
144
78
  - ext/cld3/extconf.rb
79
+ - ext/cld3/feature_extractor.cc
80
+ - ext/cld3/feature_extractor.h
81
+ - ext/cld3/feature_extractor.o
82
+ - ext/cld3/feature_extractor.pb.o
83
+ - ext/cld3/feature_extractor.proto
84
+ - ext/cld3/feature_types.cc
85
+ - ext/cld3/feature_types.h
86
+ - ext/cld3/feature_types.o
87
+ - ext/cld3/fixunicodevalue.cc
88
+ - ext/cld3/fixunicodevalue.h
89
+ - ext/cld3/fixunicodevalue.o
90
+ - ext/cld3/float16.h
91
+ - ext/cld3/fml_parser.cc
92
+ - ext/cld3/fml_parser.h
93
+ - ext/cld3/fml_parser.o
94
+ - ext/cld3/generated_entities.cc
95
+ - ext/cld3/generated_entities.o
96
+ - ext/cld3/generated_ulscript.cc
97
+ - ext/cld3/generated_ulscript.h
98
+ - ext/cld3/generated_ulscript.o
99
+ - ext/cld3/getonescriptspan.cc
100
+ - ext/cld3/getonescriptspan.h
101
+ - ext/cld3/getonescriptspan.o
102
+ - ext/cld3/integral_types.h
103
+ - ext/cld3/lang_id_nn_params.cc
104
+ - ext/cld3/lang_id_nn_params.h
105
+ - ext/cld3/lang_id_nn_params.o
106
+ - ext/cld3/language_identifier_features.cc
107
+ - ext/cld3/language_identifier_features.h
108
+ - ext/cld3/language_identifier_features.o
109
+ - ext/cld3/libcld3.so
110
+ - ext/cld3/mkmf.log
111
+ - ext/cld3/nnet_language_identifier.cc
112
+ - ext/cld3/nnet_language_identifier.h
113
+ - ext/cld3/nnet_language_identifier.o
145
114
  - ext/cld3/nnet_language_identifier_c.cc
115
+ - ext/cld3/nnet_language_identifier_c.o
116
+ - ext/cld3/offsetmap.cc
117
+ - ext/cld3/offsetmap.h
118
+ - ext/cld3/offsetmap.o
119
+ - ext/cld3/port.h
120
+ - ext/cld3/registry.cc
121
+ - ext/cld3/registry.h
122
+ - ext/cld3/registry.o
123
+ - ext/cld3/relevant_script_feature.cc
124
+ - ext/cld3/relevant_script_feature.h
125
+ - ext/cld3/relevant_script_feature.o
126
+ - ext/cld3/script_detector.h
127
+ - ext/cld3/sentence.pb.o
128
+ - ext/cld3/sentence.proto
129
+ - ext/cld3/sentence_features.cc
130
+ - ext/cld3/sentence_features.h
131
+ - ext/cld3/sentence_features.o
132
+ - ext/cld3/simple_adder.h
133
+ - ext/cld3/stringpiece.h
134
+ - ext/cld3/task_context.cc
135
+ - ext/cld3/task_context.h
136
+ - ext/cld3/task_context.o
137
+ - ext/cld3/task_context_params.cc
138
+ - ext/cld3/task_context_params.h
139
+ - ext/cld3/task_context_params.o
140
+ - ext/cld3/task_spec.pb.o
141
+ - ext/cld3/task_spec.proto
142
+ - ext/cld3/text_processing.cc
143
+ - ext/cld3/text_processing.h
144
+ - ext/cld3/text_processing.o
145
+ - ext/cld3/unicodetext.cc
146
+ - ext/cld3/unicodetext.h
147
+ - ext/cld3/unicodetext.o
148
+ - ext/cld3/utf8acceptinterchange.h
149
+ - ext/cld3/utf8prop_lettermarkscriptnum.h
150
+ - ext/cld3/utf8repl_lettermarklower.h
151
+ - ext/cld3/utf8scannot_lettermarkspecial.h
152
+ - ext/cld3/utf8statetable.cc
153
+ - ext/cld3/utf8statetable.h
154
+ - ext/cld3/utf8statetable.o
155
+ - ext/cld3/utils.cc
156
+ - ext/cld3/utils.h
157
+ - ext/cld3/utils.o
158
+ - ext/cld3/workspace.cc
159
+ - ext/cld3/workspace.h
160
+ - ext/cld3/workspace.o
146
161
  - lib/cld3.rb
147
162
  homepage: https://github.com/akihikodaki/cld3-ruby
148
163
  licenses:
@@ -1,69 +0,0 @@
1
- # This cmake scripts only builds a static cld3 lib and the unittests.
2
-
3
- project(cld3)
4
-
5
- # Old versions of cmake dont search/find protobuf lite
6
- cmake_minimum_required(VERSION 3.9)
7
-
8
- find_package(Protobuf REQUIRED)
9
- message(STATUS "Protobuf_FOUND= ${Protobuf_FOUND}")
10
- message(STATUS "Protobuf_VERSION= ${Protobuf_VERSION}")
11
- message(WARNING "Protobuf 2.5 and CLD3 seems happy together. This script does NOT check if your verison of protobuf is compatible.")
12
- message(STATUS "Protobuf_LIBRARIES= ${Protobuf_LIBRARIES}")
13
- message(STATUS "Protobuf_LITE_LIBRARIES= ${Protobuf_LITE_LIBRARIES}") # Usually /usr/lib64/libprotobuf-lite.so
14
-
15
- # By default, protobuf_generate_cpp generates pb.* files directy in the cmake build dir.
16
- # But CLD3 sources have been coded using hard coded pathes to cld_3/protos/*.pb.h.
17
- # So *.pb.h must be output to cld_3/protos.
18
- # For that, let's use a custom my_protobuf_generate_cpp:
19
- include(${CMAKE_CURRENT_SOURCE_DIR}/misc/myprotobuf.cmake)
20
- my_protobuf_generate_cpp(cld_3/protos PROTO_SRCS PROTO_HDRS src/feature_extractor.proto src/sentence.proto src/task_spec.proto)
21
- message(STATUS "PROTO_HDRS= ${PROTO_HDRS}")
22
-
23
- add_definitions(-fPIC) # Position Independant Code
24
- add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
25
- add_definitions(-std=c++11) # Needed for std::to_string(), ...
26
-
27
- include_directories(${CMAKE_CURRENT_BINARY_DIR}) # needed to include generated pb headers
28
-
29
- add_library(${PROJECT_NAME}
30
- ${PROTO_SRCS} ${PROTO_HDRS}
31
- src/base.cc
32
- src/embedding_feature_extractor.cc
33
- src/embedding_network.cc
34
- src/feature_extractor.cc
35
- src/feature_extractor.h
36
- src/feature_types.cc
37
- src/fml_parser.cc
38
- src/language_identifier_features.cc
39
- src/lang_id_nn_params.cc
40
- src/nnet_language_identifier.cc
41
- src/registry.cc
42
- src/relevant_script_feature.cc
43
- src/sentence_features.cc
44
- src/task_context.cc
45
- src/task_context_params.cc
46
- src/unicodetext.cc
47
- src/utils.cc
48
- src/workspace.cc
49
-
50
- src/script_span/generated_entities.cc
51
- src/script_span/getonescriptspan.cc
52
- src/script_span/getonescriptspan.h
53
- src/script_span/getonescriptspan_test.cc
54
- src/script_span/utf8statetable.cc
55
- src/script_span/offsetmap.cc
56
- src/script_span/text_processing.cc
57
- src/script_span/text_processing.h
58
- src/script_span/fixunicodevalue.cc
59
- )
60
-
61
- # unit tests exec:
62
- add_executable(language_identifier_main src/language_identifier_main.cc)
63
- target_link_libraries(language_identifier_main cld3 ${Protobuf_LITE_LIBRARIES})
64
-
65
- add_executable(getonescriptspan_test src/script_span/getonescriptspan_test.cc)
66
- target_link_libraries(getonescriptspan_test cld3 ${Protobuf_LITE_LIBRARIES})
67
-
68
- add_executable(language_identifier_features_test src/language_identifier_features_test.cc)
69
- target_link_libraries(language_identifier_features_test cld3 ${Protobuf_LITE_LIBRARIES})
@@ -1,26 +0,0 @@
1
- Want to contribute? Great! First, read this page (including the small print at
2
- the end).
3
-
4
- ### Before you contribute
5
- Before we can use your code, you must sign the
6
- [Google Individual Contributor License Agreement](https://cla.developers.google.com/about/google-individual)
7
- (CLA), which you can do online. The CLA is necessary mainly because you own the
8
- copyright to your changes, even after your contribution becomes part of our
9
- codebase, so we need your permission to use and distribute your code. We also
10
- need to be sure of various other things—for instance that you'll tell us if you
11
- know that your code infringes on other people's patents. You don't have to sign
12
- the CLA until after you've submitted your code for review and a member has
13
- approved it, but you must do it before we can put your code into our codebase.
14
- Before you start working on a larger contribution, you should get in touch with
15
- us first through the issue tracker with your idea so that we can help out and
16
- possibly guide you. Coordinating up front makes it much easier to avoid
17
- frustration later on.
18
-
19
- ### Code reviews
20
- All submissions, including submissions by project members, require review. We
21
- use Github pull requests for this purpose.
22
-
23
- ### The small print
24
- Contributions made by corporations are covered by a different agreement than
25
- the one above, the
26
- [Software Grant and Corporate Contributor License Agreement](https://cla.developers.google.com/about/google-corporate).
@@ -1,73 +0,0 @@
1
- # Compact Language Detector v3 (CLD3)
2
-
3
- * [Model](#model)
4
- * [Installation](#installation)
5
- * [Contact](#contact)
6
- * [Credits](#credits)
7
-
8
- ### Model
9
-
10
- CLD3 is a neural network model for language identification. This package
11
- contains the inference code and a trained model. The inference code
12
- extracts character ngrams from the input text and computes the fraction
13
- of times each of them appears. For example, as shown in the figure below,
14
- if the input text is "banana", then one of the extracted trigrams is "ana"
15
- and the corresponding fraction is 2/4. The ngrams are hashed down to an id
16
- within a small range, and each id is represented by a dense embedding vector
17
- estimated during training.
18
-
19
- The model averages the embeddings corresponding to each ngram type according
20
- to the fractions, and the averaged embeddings are concatenated to produce
21
- the embedding layer. The remaining components of the network are a hidden
22
- (Rectified linear) layer and a softmax layer.
23
-
24
- To get a language prediction for the input text, we simply perform a forward
25
- pass through the network.
26
-
27
- ![Figure](model.png "CLD3")
28
-
29
- ### Installation
30
- CLD3 is designed to run in the Chrome browser, so it relies on code in
31
- [Chromium](http://www.chromium.org/).
32
- The steps for building and running the demo of the language detection model are:
33
-
34
- - [check out](http://www.chromium.org/developers/how-tos/get-the-code) the
35
- Chromium repository.
36
- - copy the code to `//third_party/cld_3`
37
- - Uncomment `language_identifier_main` executable in `src/BUILD.gn`.
38
- - build and run the model using the commands:
39
-
40
- ```shell
41
- gn gen out/Default
42
- ninja -C out/Default third_party/cld_3/src/src:language_identifier_main
43
- out/Default/language_identifier_main
44
- ```
45
- ### Bugs and Feature Requests
46
-
47
- Open a [GitHub issue](https://github.com/google/cld3/issues) for this repository to file bugs and feature requests.
48
-
49
- ### Announcements and Discussion
50
-
51
- For announcements regarding major updates as well as general discussion list, please subscribe to:
52
- [cld3-users@googlegroups.com](https://groups.google.com/forum/#!forum/cld3-users)
53
-
54
- ### Credits
55
-
56
- Original authors of the code in this package include (in alphabetical order):
57
-
58
- * Alex Salcianu
59
- * Andy Golding
60
- * Anton Bakalov
61
- * Chris Alberti
62
- * Daniel Andor
63
- * David Weiss
64
- * Emily Pitler
65
- * Greg Coppola
66
- * Jason Riesa
67
- * Kuzman Ganchev
68
- * Michael Ringgaard
69
- * Nan Hua
70
- * Ryan McDonald
71
- * Slav Petrov
72
- * Stefan Istrate
73
- * Terry Koo
@@ -1,58 +0,0 @@
1
- # Special PROTOBUF_GENERATE_CPP which allows to set the output folder:
2
- # From https://stackoverflow.com/users/1600278/akira-okumura
3
-
4
- function(MY_PROTOBUF_GENERATE_CPP PATH SRCS HDRS)
5
- if(NOT ARGN)
6
- message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")
7
- return()
8
- endif()
9
-
10
- if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
11
- # Create an include path for each file specified
12
- foreach(FIL ${ARGN})
13
- get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
14
- get_filename_component(ABS_PATH ${ABS_FIL} PATH)
15
- list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
16
- if(${_contains_already} EQUAL -1)
17
- list(APPEND _protobuf_include_path -I ${ABS_PATH})
18
- endif()
19
- endforeach()
20
- else()
21
- set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
22
- endif()
23
-
24
- if(DEFINED PROTOBUF_IMPORT_DIRS)
25
- foreach(DIR ${PROTOBUF_IMPORT_DIRS})
26
- get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
27
- list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
28
- if(${_contains_already} EQUAL -1)
29
- list(APPEND _protobuf_include_path -I ${ABS_PATH})
30
- endif()
31
- endforeach()
32
- endif()
33
-
34
- set(${SRCS})
35
- set(${HDRS})
36
- foreach(FIL ${ARGN})
37
- get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
38
- get_filename_component(FIL_WE ${FIL} NAME_WE)
39
-
40
- list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.cc")
41
- list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.h")
42
-
43
- execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/${PATH})
44
-
45
- add_custom_command(
46
- OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.cc"
47
- "${CMAKE_CURRENT_BINARY_DIR}/${PATH}/${FIL_WE}.pb.h"
48
- COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
49
- ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR}/${PATH} ${_protobuf_include_path} ${ABS_FIL}
50
- DEPENDS ${ABS_FIL}
51
- COMMENT "Running C++ protocol buffer compiler on ${FIL}"
52
- VERBATIM )
53
- endforeach()
54
-
55
- set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
56
- set(${SRCS} ${${SRCS}} PARENT_SCOPE)
57
- set(${HDRS} ${${HDRS}} PARENT_SCOPE)
58
- endfunction()