cld3 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +18 -0
  3. data/LICENSE +204 -0
  4. data/LICENSE_CLD3 +203 -0
  5. data/README.md +22 -0
  6. data/cld3.gemspec +35 -0
  7. data/ext/cld3/base.cc +36 -0
  8. data/ext/cld3/base.h +106 -0
  9. data/ext/cld3/casts.h +98 -0
  10. data/ext/cld3/embedding_feature_extractor.cc +51 -0
  11. data/ext/cld3/embedding_feature_extractor.h +182 -0
  12. data/ext/cld3/embedding_network.cc +196 -0
  13. data/ext/cld3/embedding_network.h +186 -0
  14. data/ext/cld3/embedding_network_params.h +285 -0
  15. data/ext/cld3/extconf.rb +49 -0
  16. data/ext/cld3/feature_extractor.cc +137 -0
  17. data/ext/cld3/feature_extractor.h +633 -0
  18. data/ext/cld3/feature_extractor.proto +50 -0
  19. data/ext/cld3/feature_types.cc +72 -0
  20. data/ext/cld3/feature_types.h +158 -0
  21. data/ext/cld3/fixunicodevalue.cc +55 -0
  22. data/ext/cld3/fixunicodevalue.h +69 -0
  23. data/ext/cld3/float16.h +58 -0
  24. data/ext/cld3/fml_parser.cc +308 -0
  25. data/ext/cld3/fml_parser.h +123 -0
  26. data/ext/cld3/generated_entities.cc +296 -0
  27. data/ext/cld3/generated_ulscript.cc +678 -0
  28. data/ext/cld3/generated_ulscript.h +142 -0
  29. data/ext/cld3/getonescriptspan.cc +1109 -0
  30. data/ext/cld3/getonescriptspan.h +124 -0
  31. data/ext/cld3/integral_types.h +37 -0
  32. data/ext/cld3/lang_id_nn_params.cc +57449 -0
  33. data/ext/cld3/lang_id_nn_params.h +178 -0
  34. data/ext/cld3/language_identifier_features.cc +165 -0
  35. data/ext/cld3/language_identifier_features.h +116 -0
  36. data/ext/cld3/nnet_language_identifier.cc +380 -0
  37. data/ext/cld3/nnet_language_identifier.h +175 -0
  38. data/ext/cld3/nnet_language_identifier_c.cc +72 -0
  39. data/ext/cld3/offsetmap.cc +478 -0
  40. data/ext/cld3/offsetmap.h +168 -0
  41. data/ext/cld3/port.h +143 -0
  42. data/ext/cld3/registry.cc +28 -0
  43. data/ext/cld3/registry.h +242 -0
  44. data/ext/cld3/relevant_script_feature.cc +89 -0
  45. data/ext/cld3/relevant_script_feature.h +49 -0
  46. data/ext/cld3/script_detector.h +156 -0
  47. data/ext/cld3/sentence.proto +77 -0
  48. data/ext/cld3/sentence_features.cc +29 -0
  49. data/ext/cld3/sentence_features.h +35 -0
  50. data/ext/cld3/simple_adder.h +72 -0
  51. data/ext/cld3/stringpiece.h +81 -0
  52. data/ext/cld3/task_context.cc +161 -0
  53. data/ext/cld3/task_context.h +81 -0
  54. data/ext/cld3/task_context_params.cc +74 -0
  55. data/ext/cld3/task_context_params.h +54 -0
  56. data/ext/cld3/task_spec.proto +98 -0
  57. data/ext/cld3/text_processing.cc +245 -0
  58. data/ext/cld3/text_processing.h +30 -0
  59. data/ext/cld3/unicodetext.cc +96 -0
  60. data/ext/cld3/unicodetext.h +144 -0
  61. data/ext/cld3/utf8acceptinterchange.h +486 -0
  62. data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
  63. data/ext/cld3/utf8repl_lettermarklower.h +758 -0
  64. data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
  65. data/ext/cld3/utf8statetable.cc +1344 -0
  66. data/ext/cld3/utf8statetable.h +285 -0
  67. data/ext/cld3/utils.cc +241 -0
  68. data/ext/cld3/utils.h +144 -0
  69. data/ext/cld3/workspace.cc +64 -0
  70. data/ext/cld3/workspace.h +177 -0
  71. data/lib/cld3.rb +99 -0
  72. metadata +158 -0
@@ -0,0 +1,168 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ //
16
+ // Author: dsites@google.com (Dick Sites)
17
+ //
18
+
19
+ #ifndef SCRIPT_SPAN_OFFSETMAP_H_
20
+ #define SCRIPT_SPAN_OFFSETMAP_H_
21
+
22
+ #include <string> // for string
23
+
24
+ #include "integral_types.h" // for uint32
25
+
26
+ // ***************************** OffsetMap **************************
27
+ //
28
+ // An OffsetMap object is a container for a mapping from offsets in one text
29
+ // buffer A' to offsets in another text buffer A. It is most useful when A' is
30
+ // built from A via substitutions that occasionally do not preserve byte length.
31
+ //
32
+ // A series of operators are used to build the correspondence map, then
33
+ // calls can be made to map an offset in A' to an offset in A, or vice versa.
34
+ // The map starts with offset 0 in A corresponding to offset 0 in A'.
35
+ // The mapping is then built sequentially, adding on byte ranges that are
36
+ // identical in A and A', byte ranges that are inserted in A', and byte ranges
37
+ // that are deleted from A. All bytes beyond those specified when building the
38
+ // map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
39
+ // end of the map.
40
+ //
41
+ // The internal data structure records positions at which bytes are added or
42
+ // deleted. Using the map is O(1) when increasing the A' or A offset
43
+ // monotonically, and O(n) when accessing random offsets, where n is the
44
+ // number of differences.
45
+ //
46
+
47
+ namespace chrome_lang_id {
48
+ namespace CLD2 {
49
+
50
+ class OffsetMap {
51
+ public:
52
+ // Constructor, destructor
53
+ OffsetMap();
54
+ ~OffsetMap();
55
+
56
+ // Clear the map
57
+ void Clear();
58
+
59
+ // Add to mapping from A to A', specifying how many next bytes correspond
60
+ // in A and A'
61
+ void Copy(int bytes);
62
+
63
+ // Add to mapping from A to A', specifying how many next bytes are
64
+ // inserted in A' while not advancing in A at all
65
+ void Insert(int bytes);
66
+
67
+ // Add to mapping from A to A', specifying how many next bytes are
68
+ // deleted from A while not advancing in A' at all
69
+ void Delete(int bytes);
70
+
71
+ // [Finish building map,] Re-position to offset 0
72
+ // This call is optional; MapForward and MapBack finish building the map
73
+ // if necessary
74
+ void Reset();
75
+
76
+ // Map an offset in A' to the corresponding offset in A
77
+ int MapBack(int aprimeoffset);
78
+
79
+ // Map an offset in A to the corresponding offset in A'
80
+ int MapForward(int aoffset);
81
+
82
+ // h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
83
+ // from A' to A'' and h is from A to A''.
84
+ //
85
+ // Note that g->MoveForward(f->MoveForward(aoffset)) always equals
86
+ // to h->MoveForward(aoffset), while
87
+ // f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
88
+ // to h->MoveBack(aprimeprimeoffset). This happens when deletion in
89
+ // f and insertion in g are at the same place. For example,
90
+ //
91
+ // A 1 2 3 4
92
+ // ^ | ^ ^
93
+ // | | / | f
94
+ // v vv v
95
+ // A' 1' 2' 3'
96
+ // ^ ^^ ^
97
+ // | | \ | g
98
+ // v | v v
99
+ // A'' 1'' 2'' 3'' 4''
100
+ //
101
+ // results in:
102
+ //
103
+ // A 1 2 3 4
104
+ // ^ ^\ ^ ^
105
+ // | | \ | | h
106
+ // v | vv v
107
+ // A'' 1'' 2'' 3'' 4''
108
+ //
109
+ // 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
110
+ // the latter figure.
111
+ static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
112
+
113
+ // For testing only -- force a mapping
114
+ void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
115
+
116
+ private:
117
+ enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
118
+
119
+ void Flush();
120
+ void FlushAll();
121
+ void MaybeFlushAll();
122
+ void Emit(MapOp op, int len);
123
+
124
+ void SetLeft();
125
+ void SetRight();
126
+
127
+ // Back up over previous range, 1..5 bytes
128
+ // Return subscript at the beginning of that. Pins at 0
129
+ int Backup(int sub);
130
+
131
+ // Parse next range, 1..5 bytes
132
+ // Return subscript just off the end of that
133
+ int ParseNext(int sub, MapOp* op, int* length);
134
+
135
+ // Parse previous range, 1..5 bytes
136
+ // Return current subscript
137
+ int ParsePrevious(int sub, MapOp* op, int* length);
138
+
139
+ bool MoveRight(); // Returns true if OK
140
+ bool MoveLeft(); // Returns true if OK
141
+
142
+ // Copies insert operations from source to dest. Returns true if no
143
+ // other operations are found.
144
+ static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
145
+
146
+ // Copies delete operations from source to dest. Returns true if no other
147
+ // operations are found.
148
+ static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
149
+
150
+ std::string diffs_;
151
+ MapOp pending_op_;
152
+ uint32 pending_length_;
153
+
154
+ // Offsets in the ranges below correspond to each other, with A' = A + diff
155
+ int next_diff_sub_;
156
+ int current_lo_aoffset_;
157
+ int current_hi_aoffset_;
158
+ int current_lo_aprimeoffset_;
159
+ int current_hi_aprimeoffset_;
160
+ int current_diff_;
161
+ int max_aoffset_;
162
+ int max_aprimeoffset_;
163
+ };
164
+
165
+ } // namespace CLD2
166
+ } // namespace chrome_lang_id
167
+
168
+ #endif // SCRIPT_SPAN_OFFSETMAP_H_
@@ -0,0 +1,143 @@
1
+ // Copyright 2013 Google Inc. All Rights Reserved.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ //
16
+ // These are weird things we need to do to get this compiling on
17
+ // random systems [subset].
18
+
19
+ #ifndef SCRIPT_SPAN_PORT_H_
20
+ #define SCRIPT_SPAN_PORT_H_
21
+
22
+ #include <string.h> // for memcpy()
23
+
24
+ #include "integral_types.h"
25
+
26
+ namespace chrome_lang_id {
27
+ namespace CLD2 {
28
+
29
+ // Portable handling of unaligned loads, stores, and copies.
30
+ // On some platforms, like ARM, the copy functions can be more efficient
31
+ // then a load and a store.
32
+
33
+ #if defined(ARCH_PIII) || defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
34
+
35
+ // x86 and x86-64 can perform unaligned loads/stores directly;
36
+ // modern PowerPC hardware can also do unaligned integer loads and stores;
37
+ // but note: the FPU still sends unaligned loads and stores to a trap handler!
38
+
39
+ #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
40
+ #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
41
+ #define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
42
+
43
+ #define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
44
+ #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
45
+ #define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
46
+
47
+ #elif defined(__arm__) && \
48
+ !defined(__ARM_ARCH_5__) && \
49
+ !defined(__ARM_ARCH_5T__) && \
50
+ !defined(__ARM_ARCH_5TE__) && \
51
+ !defined(__ARM_ARCH_5TEJ__) && \
52
+ !defined(__ARM_ARCH_6__) && \
53
+ !defined(__ARM_ARCH_6J__) && \
54
+ !defined(__ARM_ARCH_6K__) && \
55
+ !defined(__ARM_ARCH_6Z__) && \
56
+ !defined(__ARM_ARCH_6ZK__) && \
57
+ !defined(__ARM_ARCH_6T2__) && \
58
+ !defined(__ARM_ARCH_7__) && \
59
+ !defined(__ARM_ARCH_7A__) && \
60
+ !defined(__ARM_ARCH_7M__) && \
61
+ !defined(__ARM_ARCH_7R__) && \
62
+ !defined(__ARM_ARCH_8__) && \
63
+ !defined(__ARM_ARCH_8A__)
64
+
65
+ // ARMv7 and newer support native unaligned accesses, but only of 16-bit
66
+ // and 32-bit values (not 64-bit); older versions either raise a fatal signal,
67
+ // do an unaligned read and rotate the words around a bit, or do the reads very
68
+ // slowly (trip through kernel mode). There's no simple #define that says just
69
+ // “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
70
+ // sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
71
+ // so in time, maybe we can move on to that.
72
+ //
73
+ // Note that even if a chipset supports unaligned access, it might not be
74
+ // enabled in any given system, e.g.:
75
+ // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0491c/CIHCGCFD.html
76
+ // Therefore, it's generally just not safe to allow unaligned access on any ARM
77
+ // variant.
78
+ //
79
+ // This is a mess, but there's not much we can do about it.
80
+
81
+ #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
82
+ #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
83
+
84
+ #define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
85
+ #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
86
+
87
+ // TODO(sesse): NEON supports unaligned 64-bit loads and stores.
88
+ // See if that would be more efficient on platforms supporting it,
89
+ // at least for copies.
90
+
91
+ inline uint64 UNALIGNED_LOAD64(const void *p) {
92
+ uint64 t;
93
+ memcpy(&t, p, sizeof t);
94
+ return t;
95
+ }
96
+
97
+ inline void UNALIGNED_STORE64(void *p, uint64 v) {
98
+ memcpy(p, &v, sizeof v);
99
+ }
100
+
101
+ #else
102
+
103
+ #define NEED_ALIGNED_LOADS
104
+
105
+ // These functions are provided for architectures that don't support
106
+ // unaligned loads and stores.
107
+
108
+ inline uint16 UNALIGNED_LOAD16(const void *p) {
109
+ uint16 t;
110
+ memcpy(&t, p, sizeof t);
111
+ return t;
112
+ }
113
+
114
+ inline uint32 UNALIGNED_LOAD32(const void *p) {
115
+ uint32 t;
116
+ memcpy(&t, p, sizeof t);
117
+ return t;
118
+ }
119
+
120
+ inline uint64 UNALIGNED_LOAD64(const void *p) {
121
+ uint64 t;
122
+ memcpy(&t, p, sizeof t);
123
+ return t;
124
+ }
125
+
126
+ inline void UNALIGNED_STORE16(void *p, uint16 v) {
127
+ memcpy(p, &v, sizeof v);
128
+ }
129
+
130
+ inline void UNALIGNED_STORE32(void *p, uint32 v) {
131
+ memcpy(p, &v, sizeof v);
132
+ }
133
+
134
+ inline void UNALIGNED_STORE64(void *p, uint64 v) {
135
+ memcpy(p, &v, sizeof v);
136
+ }
137
+
138
+ #endif
139
+
140
+ } // End namespace CLD2
141
+ } // End namespace chrome_lang_id
142
+
143
+ #endif // SCRIPT_SPAN_PORT_H_
@@ -0,0 +1,28 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ #include "registry.h"
17
+
18
+ namespace chrome_lang_id {
19
+
20
+ // Global list of all component registries.
21
+ RegistryMetadata *global_registry_list = NULL;
22
+
23
+ void RegistryMetadata::Register(RegistryMetadata *registry) {
24
+ registry->set_link(global_registry_list);
25
+ global_registry_list = registry;
26
+ }
27
+
28
+ } // namespace chrome_lang_id
@@ -0,0 +1,242 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+
16
+ // Registry for component registration. These classes can be used for creating
17
+ // registries of components conforming to the same interface. This is useful for
18
+ // making a component-based architecture where the specific implementation
19
+ // classes can be selected at runtime. There is support for both class-based and
20
+ // instance based registries.
21
+ //
22
+ // Example:
23
+ // function.h:
24
+ //
25
+ // class Function : public RegisterableInstance<Function> {
26
+ // public:
27
+ // virtual double Evaluate(double x) = 0;
28
+ // };
29
+ //
30
+ // #define REGISTER_FUNCTION(type, component)
31
+ // REGISTER_INSTANCE_COMPONENT(Function, type, component);
32
+ //
33
+ // function.cc:
34
+ //
35
+ // REGISTER_INSTANCE_REGISTRY("function", Function);
36
+ //
37
+ // class Cos : public Function {
38
+ // public:
39
+ // double Evaluate(double x) { return cos(x); }
40
+ // };
41
+ //
42
+ // class Exp : public Function {
43
+ // public:
44
+ // double Evaluate(double x) { return exp(x); }
45
+ // };
46
+ //
47
+ // REGISTER_FUNCTION("cos", Cos);
48
+ // REGISTER_FUNCTION("exp", Exp);
49
+ //
50
+ // Function *f = Function::Lookup("cos");
51
+ // double result = f->Evaluate(arg);
52
+
53
+ #ifndef REGISTRY_H_
54
+ #define REGISTRY_H_
55
+
56
+ #include <string.h>
57
+
58
+ #include <string>
59
+
60
+ #include "base.h"
61
+
62
+ namespace chrome_lang_id {
63
+
64
+ // Component metadata with information about name, class, and code location.
65
+ class ComponentMetadata {
66
+ public:
67
+ ComponentMetadata(const char *name, const char *class_name, const char *file,
68
+ int line)
69
+ : name_(name),
70
+ class_name_(class_name),
71
+ file_(file),
72
+ line_(line),
73
+ link_(NULL) {}
74
+
75
+ // Getters.
76
+ const char *name() const { return name_; }
77
+ const char *class_name() const { return class_name_; }
78
+ const char *file() const { return file_; }
79
+ int line() const { return line_; }
80
+
81
+ // Metadata objects can be linked in a list.
82
+ ComponentMetadata *link() const { return link_; }
83
+ void set_link(ComponentMetadata *link) { link_ = link; }
84
+
85
+ private:
86
+ // Component name.
87
+ const char *name_;
88
+
89
+ // Name of class for component.
90
+ const char *class_name_;
91
+
92
+ // Code file and location where the component was registered.
93
+ const char *file_;
94
+ int line_;
95
+
96
+ // Link to next metadata object in list.
97
+ ComponentMetadata *link_;
98
+ };
99
+
100
+ // The master registry contains all registered component registries. A registry
101
+ // is not registered in the master registry until the first component of that
102
+ // type is registered.
103
+ class RegistryMetadata : public ComponentMetadata {
104
+ public:
105
+ RegistryMetadata(const char *name, const char *class_name, const char *file,
106
+ int line)
107
+ : ComponentMetadata(name, class_name, file, line) {}
108
+
109
+ // Registers a component registry in the master registry.
110
+ static void Register(RegistryMetadata *registry);
111
+ };
112
+
113
+ // Registry for components. An object can be registered with a type name in the
114
+ // registry. The named instances in the registry can be returned using the
115
+ // Lookup() method. The components in the registry are put into a linked list
116
+ // of components. It is important that the component registry can be statically
117
+ // initialized in order not to depend on initialization order.
118
+ template <class T>
119
+ struct ComponentRegistry {
120
+ typedef ComponentRegistry<T> Self;
121
+
122
+ // Component registration class.
123
+ class Registrar : public ComponentMetadata {
124
+ public:
125
+ // Registers new component by linking itself into the component list of
126
+ // the registry.
127
+ Registrar(Self *registry, const char *type, const char *class_name,
128
+ const char *file, int line, T *object)
129
+ : ComponentMetadata(type, class_name, file, line), object_(object) {
130
+ // Register registry in master registry if this is the first registered
131
+ // component of this type.
132
+ if (registry->components == NULL) {
133
+ RegistryMetadata::Register(
134
+ new RegistryMetadata(registry->name, registry->class_name,
135
+ registry->file, registry->line));
136
+ }
137
+
138
+ // Register component in registry.
139
+ set_link(registry->components);
140
+ registry->components = this;
141
+ }
142
+
143
+ // Returns component type.
144
+ const char *type() const { return name(); }
145
+
146
+ // Returns component object.
147
+ T *object() const { return object_; }
148
+
149
+ // Returns the next component in the component list.
150
+ Registrar *next() const { return static_cast<Registrar *>(link()); }
151
+
152
+ private:
153
+ // Component object.
154
+ T *object_;
155
+ };
156
+
157
+ // Finds registrar for named component in registry.
158
+ const Registrar *GetComponent(const char *type) const {
159
+ Registrar *r = components;
160
+ while (r != NULL && strcmp(type, r->type()) != 0) r = r->next();
161
+ CLD3_DCHECK(r != nullptr);
162
+
163
+ return r;
164
+ }
165
+
166
+ // Finds a named component in the registry.
167
+ T *Lookup(const char *type) const { return GetComponent(type)->object(); }
168
+ T *Lookup(const string &type) const { return Lookup(type.c_str()); }
169
+
170
+ // Textual description of the kind of components in the registry.
171
+ const char *name;
172
+
173
+ // Base class name of component type.
174
+ const char *class_name;
175
+
176
+ // File and line where the registry is defined.
177
+ const char *file;
178
+ int line;
179
+
180
+ // Linked list of registered components.
181
+ Registrar *components;
182
+ };
183
+
184
+ // Base class for registerable class-based components.
185
+ template <class T>
186
+ class RegisterableClass {
187
+ public:
188
+ // Factory function type.
189
+ typedef T *(Factory)();
190
+
191
+ // Registry type.
192
+ typedef ComponentRegistry<Factory> Registry;
193
+
194
+ // Should be called before any call to Create() or registry(), i.e., before
195
+ // using the registration mechanism to register and or instantiate subclasses
196
+ // of T.
197
+ static void CreateRegistry(
198
+ const char *name,
199
+ const char *class_name,
200
+ const char *file,
201
+ int line) {
202
+ registry_ = new Registry();
203
+ registry_->name = name;
204
+ registry_->class_name = class_name;
205
+ registry_->file = file;
206
+ registry_->line = line;
207
+ registry_->components = nullptr;
208
+ }
209
+
210
+ // Should be called when one is done using the registration mechanism for
211
+ // class T.
212
+ static void DeleteRegistry() {
213
+ delete registry_;
214
+ registry_ = nullptr;
215
+ }
216
+
217
+ // Creates a new component instance.
218
+ static T *Create(const string &type) { return registry()->Lookup(type)(); }
219
+
220
+ // Returns registry for class.
221
+ static Registry *registry() { return registry_; }
222
+
223
+ private:
224
+ // Registry for class.
225
+ static Registry *registry_;
226
+ };
227
+
228
+ // Base class for registerable instance-based components.
229
+ template <class T>
230
+ class RegisterableInstance {
231
+ public:
232
+ // Registry type.
233
+ typedef ComponentRegistry<T> Registry;
234
+
235
+ private:
236
+ // Registry for class.
237
+ static Registry registry_;
238
+ };
239
+
240
+ } // namespace chrome_lang_id
241
+
242
+ #endif // REGISTRY_H_