cld3 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +18 -0
- data/LICENSE +204 -0
- data/LICENSE_CLD3 +203 -0
- data/README.md +22 -0
- data/cld3.gemspec +35 -0
- data/ext/cld3/base.cc +36 -0
- data/ext/cld3/base.h +106 -0
- data/ext/cld3/casts.h +98 -0
- data/ext/cld3/embedding_feature_extractor.cc +51 -0
- data/ext/cld3/embedding_feature_extractor.h +182 -0
- data/ext/cld3/embedding_network.cc +196 -0
- data/ext/cld3/embedding_network.h +186 -0
- data/ext/cld3/embedding_network_params.h +285 -0
- data/ext/cld3/extconf.rb +49 -0
- data/ext/cld3/feature_extractor.cc +137 -0
- data/ext/cld3/feature_extractor.h +633 -0
- data/ext/cld3/feature_extractor.proto +50 -0
- data/ext/cld3/feature_types.cc +72 -0
- data/ext/cld3/feature_types.h +158 -0
- data/ext/cld3/fixunicodevalue.cc +55 -0
- data/ext/cld3/fixunicodevalue.h +69 -0
- data/ext/cld3/float16.h +58 -0
- data/ext/cld3/fml_parser.cc +308 -0
- data/ext/cld3/fml_parser.h +123 -0
- data/ext/cld3/generated_entities.cc +296 -0
- data/ext/cld3/generated_ulscript.cc +678 -0
- data/ext/cld3/generated_ulscript.h +142 -0
- data/ext/cld3/getonescriptspan.cc +1109 -0
- data/ext/cld3/getonescriptspan.h +124 -0
- data/ext/cld3/integral_types.h +37 -0
- data/ext/cld3/lang_id_nn_params.cc +57449 -0
- data/ext/cld3/lang_id_nn_params.h +178 -0
- data/ext/cld3/language_identifier_features.cc +165 -0
- data/ext/cld3/language_identifier_features.h +116 -0
- data/ext/cld3/nnet_language_identifier.cc +380 -0
- data/ext/cld3/nnet_language_identifier.h +175 -0
- data/ext/cld3/nnet_language_identifier_c.cc +72 -0
- data/ext/cld3/offsetmap.cc +478 -0
- data/ext/cld3/offsetmap.h +168 -0
- data/ext/cld3/port.h +143 -0
- data/ext/cld3/registry.cc +28 -0
- data/ext/cld3/registry.h +242 -0
- data/ext/cld3/relevant_script_feature.cc +89 -0
- data/ext/cld3/relevant_script_feature.h +49 -0
- data/ext/cld3/script_detector.h +156 -0
- data/ext/cld3/sentence.proto +77 -0
- data/ext/cld3/sentence_features.cc +29 -0
- data/ext/cld3/sentence_features.h +35 -0
- data/ext/cld3/simple_adder.h +72 -0
- data/ext/cld3/stringpiece.h +81 -0
- data/ext/cld3/task_context.cc +161 -0
- data/ext/cld3/task_context.h +81 -0
- data/ext/cld3/task_context_params.cc +74 -0
- data/ext/cld3/task_context_params.h +54 -0
- data/ext/cld3/task_spec.proto +98 -0
- data/ext/cld3/text_processing.cc +245 -0
- data/ext/cld3/text_processing.h +30 -0
- data/ext/cld3/unicodetext.cc +96 -0
- data/ext/cld3/unicodetext.h +144 -0
- data/ext/cld3/utf8acceptinterchange.h +486 -0
- data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
- data/ext/cld3/utf8repl_lettermarklower.h +758 -0
- data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
- data/ext/cld3/utf8statetable.cc +1344 -0
- data/ext/cld3/utf8statetable.h +285 -0
- data/ext/cld3/utils.cc +241 -0
- data/ext/cld3/utils.h +144 -0
- data/ext/cld3/workspace.cc +64 -0
- data/ext/cld3/workspace.h +177 -0
- data/lib/cld3.rb +99 -0
- metadata +158 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
|
15
|
+
//
|
16
|
+
// Author: dsites@google.com (Dick Sites)
|
17
|
+
//
|
18
|
+
|
19
|
+
#ifndef SCRIPT_SPAN_OFFSETMAP_H_
|
20
|
+
#define SCRIPT_SPAN_OFFSETMAP_H_
|
21
|
+
|
22
|
+
#include <string> // for string
|
23
|
+
|
24
|
+
#include "integral_types.h" // for uint32
|
25
|
+
|
26
|
+
// ***************************** OffsetMap **************************
|
27
|
+
//
|
28
|
+
// An OffsetMap object is a container for a mapping from offsets in one text
|
29
|
+
// buffer A' to offsets in another text buffer A. It is most useful when A' is
|
30
|
+
// built from A via substitutions that occasionally do not preserve byte length.
|
31
|
+
//
|
32
|
+
// A series of operators are used to build the correspondence map, then
|
33
|
+
// calls can be made to map an offset in A' to an offset in A, or vice versa.
|
34
|
+
// The map starts with offset 0 in A corresponding to offset 0 in A'.
|
35
|
+
// The mapping is then built sequentially, adding on byte ranges that are
|
36
|
+
// identical in A and A', byte ranges that are inserted in A', and byte ranges
|
37
|
+
// that are deleted from A. All bytes beyond those specified when building the
|
38
|
+
// map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
|
39
|
+
// end of the map.
|
40
|
+
//
|
41
|
+
// The internal data structure records positions at which bytes are added or
|
42
|
+
// deleted. Using the map is O(1) when increasing the A' or A offset
|
43
|
+
// monotonically, and O(n) when accessing random offsets, where n is the
|
44
|
+
// number of differences.
|
45
|
+
//
|
46
|
+
|
47
|
+
namespace chrome_lang_id {
|
48
|
+
namespace CLD2 {
|
49
|
+
|
50
|
+
class OffsetMap {
|
51
|
+
public:
|
52
|
+
// Constructor, destructor
|
53
|
+
OffsetMap();
|
54
|
+
~OffsetMap();
|
55
|
+
|
56
|
+
// Clear the map
|
57
|
+
void Clear();
|
58
|
+
|
59
|
+
// Add to mapping from A to A', specifying how many next bytes correspond
|
60
|
+
// in A and A'
|
61
|
+
void Copy(int bytes);
|
62
|
+
|
63
|
+
// Add to mapping from A to A', specifying how many next bytes are
|
64
|
+
// inserted in A' while not advancing in A at all
|
65
|
+
void Insert(int bytes);
|
66
|
+
|
67
|
+
// Add to mapping from A to A', specifying how many next bytes are
|
68
|
+
// deleted from A while not advancing in A' at all
|
69
|
+
void Delete(int bytes);
|
70
|
+
|
71
|
+
// [Finish building map,] Re-position to offset 0
|
72
|
+
// This call is optional; MapForward and MapBack finish building the map
|
73
|
+
// if necessary
|
74
|
+
void Reset();
|
75
|
+
|
76
|
+
// Map an offset in A' to the corresponding offset in A
|
77
|
+
int MapBack(int aprimeoffset);
|
78
|
+
|
79
|
+
// Map an offset in A to the corresponding offset in A'
|
80
|
+
int MapForward(int aoffset);
|
81
|
+
|
82
|
+
// h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
|
83
|
+
// from A' to A'' and h is from A to A''.
|
84
|
+
//
|
85
|
+
// Note that g->MoveForward(f->MoveForward(aoffset)) always equals
|
86
|
+
// to h->MoveForward(aoffset), while
|
87
|
+
// f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
|
88
|
+
// to h->MoveBack(aprimeprimeoffset). This happens when deletion in
|
89
|
+
// f and insertion in g are at the same place. For example,
|
90
|
+
//
|
91
|
+
// A 1 2 3 4
|
92
|
+
// ^ | ^ ^
|
93
|
+
// | | / | f
|
94
|
+
// v vv v
|
95
|
+
// A' 1' 2' 3'
|
96
|
+
// ^ ^^ ^
|
97
|
+
// | | \ | g
|
98
|
+
// v | v v
|
99
|
+
// A'' 1'' 2'' 3'' 4''
|
100
|
+
//
|
101
|
+
// results in:
|
102
|
+
//
|
103
|
+
// A 1 2 3 4
|
104
|
+
// ^ ^\ ^ ^
|
105
|
+
// | | \ | | h
|
106
|
+
// v | vv v
|
107
|
+
// A'' 1'' 2'' 3'' 4''
|
108
|
+
//
|
109
|
+
// 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
|
110
|
+
// the latter figure.
|
111
|
+
static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
|
112
|
+
|
113
|
+
// For testing only -- force a mapping
|
114
|
+
void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
|
115
|
+
|
116
|
+
private:
|
117
|
+
enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
|
118
|
+
|
119
|
+
void Flush();
|
120
|
+
void FlushAll();
|
121
|
+
void MaybeFlushAll();
|
122
|
+
void Emit(MapOp op, int len);
|
123
|
+
|
124
|
+
void SetLeft();
|
125
|
+
void SetRight();
|
126
|
+
|
127
|
+
// Back up over previous range, 1..5 bytes
|
128
|
+
// Return subscript at the beginning of that. Pins at 0
|
129
|
+
int Backup(int sub);
|
130
|
+
|
131
|
+
// Parse next range, 1..5 bytes
|
132
|
+
// Return subscript just off the end of that
|
133
|
+
int ParseNext(int sub, MapOp* op, int* length);
|
134
|
+
|
135
|
+
// Parse previous range, 1..5 bytes
|
136
|
+
// Return current subscript
|
137
|
+
int ParsePrevious(int sub, MapOp* op, int* length);
|
138
|
+
|
139
|
+
bool MoveRight(); // Returns true if OK
|
140
|
+
bool MoveLeft(); // Returns true if OK
|
141
|
+
|
142
|
+
// Copies insert operations from source to dest. Returns true if no
|
143
|
+
// other operations are found.
|
144
|
+
static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
|
145
|
+
|
146
|
+
// Copies delete operations from source to dest. Returns true if no other
|
147
|
+
// operations are found.
|
148
|
+
static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
|
149
|
+
|
150
|
+
std::string diffs_;
|
151
|
+
MapOp pending_op_;
|
152
|
+
uint32 pending_length_;
|
153
|
+
|
154
|
+
// Offsets in the ranges below correspond to each other, with A' = A + diff
|
155
|
+
int next_diff_sub_;
|
156
|
+
int current_lo_aoffset_;
|
157
|
+
int current_hi_aoffset_;
|
158
|
+
int current_lo_aprimeoffset_;
|
159
|
+
int current_hi_aprimeoffset_;
|
160
|
+
int current_diff_;
|
161
|
+
int max_aoffset_;
|
162
|
+
int max_aprimeoffset_;
|
163
|
+
};
|
164
|
+
|
165
|
+
} // namespace CLD2
|
166
|
+
} // namespace chrome_lang_id
|
167
|
+
|
168
|
+
#endif // SCRIPT_SPAN_OFFSETMAP_H_
|
data/ext/cld3/port.h
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
|
15
|
+
//
|
16
|
+
// These are weird things we need to do to get this compiling on
|
17
|
+
// random systems [subset].
|
18
|
+
|
19
|
+
#ifndef SCRIPT_SPAN_PORT_H_
|
20
|
+
#define SCRIPT_SPAN_PORT_H_
|
21
|
+
|
22
|
+
#include <string.h> // for memcpy()
|
23
|
+
|
24
|
+
#include "integral_types.h"
|
25
|
+
|
26
|
+
namespace chrome_lang_id {
|
27
|
+
namespace CLD2 {
|
28
|
+
|
29
|
+
// Portable handling of unaligned loads, stores, and copies.
|
30
|
+
// On some platforms, like ARM, the copy functions can be more efficient
|
31
|
+
// then a load and a store.
|
32
|
+
|
33
|
+
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
|
34
|
+
|
35
|
+
// x86 and x86-64 can perform unaligned loads/stores directly;
|
36
|
+
// modern PowerPC hardware can also do unaligned integer loads and stores;
|
37
|
+
// but note: the FPU still sends unaligned loads and stores to a trap handler!
|
38
|
+
|
39
|
+
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
|
40
|
+
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
|
41
|
+
#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
|
42
|
+
|
43
|
+
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
|
44
|
+
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
|
45
|
+
#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
|
46
|
+
|
47
|
+
#elif defined(__arm__) && \
|
48
|
+
!defined(__ARM_ARCH_5__) && \
|
49
|
+
!defined(__ARM_ARCH_5T__) && \
|
50
|
+
!defined(__ARM_ARCH_5TE__) && \
|
51
|
+
!defined(__ARM_ARCH_5TEJ__) && \
|
52
|
+
!defined(__ARM_ARCH_6__) && \
|
53
|
+
!defined(__ARM_ARCH_6J__) && \
|
54
|
+
!defined(__ARM_ARCH_6K__) && \
|
55
|
+
!defined(__ARM_ARCH_6Z__) && \
|
56
|
+
!defined(__ARM_ARCH_6ZK__) && \
|
57
|
+
!defined(__ARM_ARCH_6T2__) && \
|
58
|
+
!defined(__ARM_ARCH_7__) && \
|
59
|
+
!defined(__ARM_ARCH_7A__) && \
|
60
|
+
!defined(__ARM_ARCH_7M__) && \
|
61
|
+
!defined(__ARM_ARCH_7R__) && \
|
62
|
+
!defined(__ARM_ARCH_8__) && \
|
63
|
+
!defined(__ARM_ARCH_8A__)
|
64
|
+
|
65
|
+
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
66
|
+
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
67
|
+
// do an unaligned read and rotate the words around a bit, or do the reads very
|
68
|
+
// slowly (trip through kernel mode). There's no simple #define that says just
|
69
|
+
// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
|
70
|
+
// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
|
71
|
+
// so in time, maybe we can move on to that.
|
72
|
+
//
|
73
|
+
// Note that even if a chipset supports unaligned access, it might not be
|
74
|
+
// enabled in any given system, e.g.:
|
75
|
+
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0491c/CIHCGCFD.html
|
76
|
+
// Therefore, it's generally just not safe to allow unaligned access on any ARM
|
77
|
+
// variant.
|
78
|
+
//
|
79
|
+
// This is a mess, but there's not much we can do about it.
|
80
|
+
|
81
|
+
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
|
82
|
+
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
|
83
|
+
|
84
|
+
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
|
85
|
+
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
|
86
|
+
|
87
|
+
// TODO(sesse): NEON supports unaligned 64-bit loads and stores.
|
88
|
+
// See if that would be more efficient on platforms supporting it,
|
89
|
+
// at least for copies.
|
90
|
+
|
91
|
+
inline uint64 UNALIGNED_LOAD64(const void *p) {
|
92
|
+
uint64 t;
|
93
|
+
memcpy(&t, p, sizeof t);
|
94
|
+
return t;
|
95
|
+
}
|
96
|
+
|
97
|
+
inline void UNALIGNED_STORE64(void *p, uint64 v) {
|
98
|
+
memcpy(p, &v, sizeof v);
|
99
|
+
}
|
100
|
+
|
101
|
+
#else
|
102
|
+
|
103
|
+
#define NEED_ALIGNED_LOADS
|
104
|
+
|
105
|
+
// These functions are provided for architectures that don't support
|
106
|
+
// unaligned loads and stores.
|
107
|
+
|
108
|
+
inline uint16 UNALIGNED_LOAD16(const void *p) {
|
109
|
+
uint16 t;
|
110
|
+
memcpy(&t, p, sizeof t);
|
111
|
+
return t;
|
112
|
+
}
|
113
|
+
|
114
|
+
inline uint32 UNALIGNED_LOAD32(const void *p) {
|
115
|
+
uint32 t;
|
116
|
+
memcpy(&t, p, sizeof t);
|
117
|
+
return t;
|
118
|
+
}
|
119
|
+
|
120
|
+
inline uint64 UNALIGNED_LOAD64(const void *p) {
|
121
|
+
uint64 t;
|
122
|
+
memcpy(&t, p, sizeof t);
|
123
|
+
return t;
|
124
|
+
}
|
125
|
+
|
126
|
+
inline void UNALIGNED_STORE16(void *p, uint16 v) {
|
127
|
+
memcpy(p, &v, sizeof v);
|
128
|
+
}
|
129
|
+
|
130
|
+
inline void UNALIGNED_STORE32(void *p, uint32 v) {
|
131
|
+
memcpy(p, &v, sizeof v);
|
132
|
+
}
|
133
|
+
|
134
|
+
inline void UNALIGNED_STORE64(void *p, uint64 v) {
|
135
|
+
memcpy(p, &v, sizeof v);
|
136
|
+
}
|
137
|
+
|
138
|
+
#endif
|
139
|
+
|
140
|
+
} // End namespace CLD2
|
141
|
+
} // End namespace chrome_lang_id
|
142
|
+
|
143
|
+
#endif // SCRIPT_SPAN_PORT_H_
|
@@ -0,0 +1,28 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#include "registry.h"
|
17
|
+
|
18
|
+
namespace chrome_lang_id {
|
19
|
+
|
20
|
+
// Global list of all component registries.
|
21
|
+
RegistryMetadata *global_registry_list = NULL;
|
22
|
+
|
23
|
+
void RegistryMetadata::Register(RegistryMetadata *registry) {
|
24
|
+
registry->set_link(global_registry_list);
|
25
|
+
global_registry_list = registry;
|
26
|
+
}
|
27
|
+
|
28
|
+
} // namespace chrome_lang_id
|
data/ext/cld3/registry.h
ADDED
@@ -0,0 +1,242 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
// Registry for component registration. These classes can be used for creating
|
17
|
+
// registries of components conforming to the same interface. This is useful for
|
18
|
+
// making a component-based architecture where the specific implementation
|
19
|
+
// classes can be selected at runtime. There is support for both class-based and
|
20
|
+
// instance based registries.
|
21
|
+
//
|
22
|
+
// Example:
|
23
|
+
// function.h:
|
24
|
+
//
|
25
|
+
// class Function : public RegisterableInstance<Function> {
|
26
|
+
// public:
|
27
|
+
// virtual double Evaluate(double x) = 0;
|
28
|
+
// };
|
29
|
+
//
|
30
|
+
// #define REGISTER_FUNCTION(type, component)
|
31
|
+
// REGISTER_INSTANCE_COMPONENT(Function, type, component);
|
32
|
+
//
|
33
|
+
// function.cc:
|
34
|
+
//
|
35
|
+
// REGISTER_INSTANCE_REGISTRY("function", Function);
|
36
|
+
//
|
37
|
+
// class Cos : public Function {
|
38
|
+
// public:
|
39
|
+
// double Evaluate(double x) { return cos(x); }
|
40
|
+
// };
|
41
|
+
//
|
42
|
+
// class Exp : public Function {
|
43
|
+
// public:
|
44
|
+
// double Evaluate(double x) { return exp(x); }
|
45
|
+
// };
|
46
|
+
//
|
47
|
+
// REGISTER_FUNCTION("cos", Cos);
|
48
|
+
// REGISTER_FUNCTION("exp", Exp);
|
49
|
+
//
|
50
|
+
// Function *f = Function::Lookup("cos");
|
51
|
+
// double result = f->Evaluate(arg);
|
52
|
+
|
53
|
+
#ifndef REGISTRY_H_
|
54
|
+
#define REGISTRY_H_
|
55
|
+
|
56
|
+
#include <string.h>
|
57
|
+
|
58
|
+
#include <string>
|
59
|
+
|
60
|
+
#include "base.h"
|
61
|
+
|
62
|
+
namespace chrome_lang_id {
|
63
|
+
|
64
|
+
// Component metadata with information about name, class, and code location.
|
65
|
+
class ComponentMetadata {
|
66
|
+
public:
|
67
|
+
ComponentMetadata(const char *name, const char *class_name, const char *file,
|
68
|
+
int line)
|
69
|
+
: name_(name),
|
70
|
+
class_name_(class_name),
|
71
|
+
file_(file),
|
72
|
+
line_(line),
|
73
|
+
link_(NULL) {}
|
74
|
+
|
75
|
+
// Getters.
|
76
|
+
const char *name() const { return name_; }
|
77
|
+
const char *class_name() const { return class_name_; }
|
78
|
+
const char *file() const { return file_; }
|
79
|
+
int line() const { return line_; }
|
80
|
+
|
81
|
+
// Metadata objects can be linked in a list.
|
82
|
+
ComponentMetadata *link() const { return link_; }
|
83
|
+
void set_link(ComponentMetadata *link) { link_ = link; }
|
84
|
+
|
85
|
+
private:
|
86
|
+
// Component name.
|
87
|
+
const char *name_;
|
88
|
+
|
89
|
+
// Name of class for component.
|
90
|
+
const char *class_name_;
|
91
|
+
|
92
|
+
// Code file and location where the component was registered.
|
93
|
+
const char *file_;
|
94
|
+
int line_;
|
95
|
+
|
96
|
+
// Link to next metadata object in list.
|
97
|
+
ComponentMetadata *link_;
|
98
|
+
};
|
99
|
+
|
100
|
+
// The master registry contains all registered component registries. A registry
|
101
|
+
// is not registered in the master registry until the first component of that
|
102
|
+
// type is registered.
|
103
|
+
class RegistryMetadata : public ComponentMetadata {
|
104
|
+
public:
|
105
|
+
RegistryMetadata(const char *name, const char *class_name, const char *file,
|
106
|
+
int line)
|
107
|
+
: ComponentMetadata(name, class_name, file, line) {}
|
108
|
+
|
109
|
+
// Registers a component registry in the master registry.
|
110
|
+
static void Register(RegistryMetadata *registry);
|
111
|
+
};
|
112
|
+
|
113
|
+
// Registry for components. An object can be registered with a type name in the
|
114
|
+
// registry. The named instances in the registry can be returned using the
|
115
|
+
// Lookup() method. The components in the registry are put into a linked list
|
116
|
+
// of components. It is important that the component registry can be statically
|
117
|
+
// initialized in order not to depend on initialization order.
|
118
|
+
template <class T>
|
119
|
+
struct ComponentRegistry {
|
120
|
+
typedef ComponentRegistry<T> Self;
|
121
|
+
|
122
|
+
// Component registration class.
|
123
|
+
class Registrar : public ComponentMetadata {
|
124
|
+
public:
|
125
|
+
// Registers new component by linking itself into the component list of
|
126
|
+
// the registry.
|
127
|
+
Registrar(Self *registry, const char *type, const char *class_name,
|
128
|
+
const char *file, int line, T *object)
|
129
|
+
: ComponentMetadata(type, class_name, file, line), object_(object) {
|
130
|
+
// Register registry in master registry if this is the first registered
|
131
|
+
// component of this type.
|
132
|
+
if (registry->components == NULL) {
|
133
|
+
RegistryMetadata::Register(
|
134
|
+
new RegistryMetadata(registry->name, registry->class_name,
|
135
|
+
registry->file, registry->line));
|
136
|
+
}
|
137
|
+
|
138
|
+
// Register component in registry.
|
139
|
+
set_link(registry->components);
|
140
|
+
registry->components = this;
|
141
|
+
}
|
142
|
+
|
143
|
+
// Returns component type.
|
144
|
+
const char *type() const { return name(); }
|
145
|
+
|
146
|
+
// Returns component object.
|
147
|
+
T *object() const { return object_; }
|
148
|
+
|
149
|
+
// Returns the next component in the component list.
|
150
|
+
Registrar *next() const { return static_cast<Registrar *>(link()); }
|
151
|
+
|
152
|
+
private:
|
153
|
+
// Component object.
|
154
|
+
T *object_;
|
155
|
+
};
|
156
|
+
|
157
|
+
// Finds registrar for named component in registry.
|
158
|
+
const Registrar *GetComponent(const char *type) const {
|
159
|
+
Registrar *r = components;
|
160
|
+
while (r != NULL && strcmp(type, r->type()) != 0) r = r->next();
|
161
|
+
CLD3_DCHECK(r != nullptr);
|
162
|
+
|
163
|
+
return r;
|
164
|
+
}
|
165
|
+
|
166
|
+
// Finds a named component in the registry.
|
167
|
+
T *Lookup(const char *type) const { return GetComponent(type)->object(); }
|
168
|
+
T *Lookup(const string &type) const { return Lookup(type.c_str()); }
|
169
|
+
|
170
|
+
// Textual description of the kind of components in the registry.
|
171
|
+
const char *name;
|
172
|
+
|
173
|
+
// Base class name of component type.
|
174
|
+
const char *class_name;
|
175
|
+
|
176
|
+
// File and line where the registry is defined.
|
177
|
+
const char *file;
|
178
|
+
int line;
|
179
|
+
|
180
|
+
// Linked list of registered components.
|
181
|
+
Registrar *components;
|
182
|
+
};
|
183
|
+
|
184
|
+
// Base class for registerable class-based components.
|
185
|
+
template <class T>
|
186
|
+
class RegisterableClass {
|
187
|
+
public:
|
188
|
+
// Factory function type.
|
189
|
+
typedef T *(Factory)();
|
190
|
+
|
191
|
+
// Registry type.
|
192
|
+
typedef ComponentRegistry<Factory> Registry;
|
193
|
+
|
194
|
+
// Should be called before any call to Create() or registry(), i.e., before
|
195
|
+
// using the registration mechanism to register and or instantiate subclasses
|
196
|
+
// of T.
|
197
|
+
static void CreateRegistry(
|
198
|
+
const char *name,
|
199
|
+
const char *class_name,
|
200
|
+
const char *file,
|
201
|
+
int line) {
|
202
|
+
registry_ = new Registry();
|
203
|
+
registry_->name = name;
|
204
|
+
registry_->class_name = class_name;
|
205
|
+
registry_->file = file;
|
206
|
+
registry_->line = line;
|
207
|
+
registry_->components = nullptr;
|
208
|
+
}
|
209
|
+
|
210
|
+
// Should be called when one is done using the registration mechanism for
|
211
|
+
// class T.
|
212
|
+
static void DeleteRegistry() {
|
213
|
+
delete registry_;
|
214
|
+
registry_ = nullptr;
|
215
|
+
}
|
216
|
+
|
217
|
+
// Creates a new component instance.
|
218
|
+
static T *Create(const string &type) { return registry()->Lookup(type)(); }
|
219
|
+
|
220
|
+
// Returns registry for class.
|
221
|
+
static Registry *registry() { return registry_; }
|
222
|
+
|
223
|
+
private:
|
224
|
+
// Registry for class.
|
225
|
+
static Registry *registry_;
|
226
|
+
};
|
227
|
+
|
228
|
+
// Base class for registerable instance-based components.
|
229
|
+
template <class T>
|
230
|
+
class RegisterableInstance {
|
231
|
+
public:
|
232
|
+
// Registry type.
|
233
|
+
typedef ComponentRegistry<T> Registry;
|
234
|
+
|
235
|
+
private:
|
236
|
+
// Registry for class.
|
237
|
+
static Registry registry_;
|
238
|
+
};
|
239
|
+
|
240
|
+
} // namespace chrome_lang_id
|
241
|
+
|
242
|
+
#endif // REGISTRY_H_
|