cld3 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +18 -0
- data/LICENSE +204 -0
- data/LICENSE_CLD3 +203 -0
- data/README.md +22 -0
- data/cld3.gemspec +35 -0
- data/ext/cld3/base.cc +36 -0
- data/ext/cld3/base.h +106 -0
- data/ext/cld3/casts.h +98 -0
- data/ext/cld3/embedding_feature_extractor.cc +51 -0
- data/ext/cld3/embedding_feature_extractor.h +182 -0
- data/ext/cld3/embedding_network.cc +196 -0
- data/ext/cld3/embedding_network.h +186 -0
- data/ext/cld3/embedding_network_params.h +285 -0
- data/ext/cld3/extconf.rb +49 -0
- data/ext/cld3/feature_extractor.cc +137 -0
- data/ext/cld3/feature_extractor.h +633 -0
- data/ext/cld3/feature_extractor.proto +50 -0
- data/ext/cld3/feature_types.cc +72 -0
- data/ext/cld3/feature_types.h +158 -0
- data/ext/cld3/fixunicodevalue.cc +55 -0
- data/ext/cld3/fixunicodevalue.h +69 -0
- data/ext/cld3/float16.h +58 -0
- data/ext/cld3/fml_parser.cc +308 -0
- data/ext/cld3/fml_parser.h +123 -0
- data/ext/cld3/generated_entities.cc +296 -0
- data/ext/cld3/generated_ulscript.cc +678 -0
- data/ext/cld3/generated_ulscript.h +142 -0
- data/ext/cld3/getonescriptspan.cc +1109 -0
- data/ext/cld3/getonescriptspan.h +124 -0
- data/ext/cld3/integral_types.h +37 -0
- data/ext/cld3/lang_id_nn_params.cc +57449 -0
- data/ext/cld3/lang_id_nn_params.h +178 -0
- data/ext/cld3/language_identifier_features.cc +165 -0
- data/ext/cld3/language_identifier_features.h +116 -0
- data/ext/cld3/nnet_language_identifier.cc +380 -0
- data/ext/cld3/nnet_language_identifier.h +175 -0
- data/ext/cld3/nnet_language_identifier_c.cc +72 -0
- data/ext/cld3/offsetmap.cc +478 -0
- data/ext/cld3/offsetmap.h +168 -0
- data/ext/cld3/port.h +143 -0
- data/ext/cld3/registry.cc +28 -0
- data/ext/cld3/registry.h +242 -0
- data/ext/cld3/relevant_script_feature.cc +89 -0
- data/ext/cld3/relevant_script_feature.h +49 -0
- data/ext/cld3/script_detector.h +156 -0
- data/ext/cld3/sentence.proto +77 -0
- data/ext/cld3/sentence_features.cc +29 -0
- data/ext/cld3/sentence_features.h +35 -0
- data/ext/cld3/simple_adder.h +72 -0
- data/ext/cld3/stringpiece.h +81 -0
- data/ext/cld3/task_context.cc +161 -0
- data/ext/cld3/task_context.h +81 -0
- data/ext/cld3/task_context_params.cc +74 -0
- data/ext/cld3/task_context_params.h +54 -0
- data/ext/cld3/task_spec.proto +98 -0
- data/ext/cld3/text_processing.cc +245 -0
- data/ext/cld3/text_processing.h +30 -0
- data/ext/cld3/unicodetext.cc +96 -0
- data/ext/cld3/unicodetext.h +144 -0
- data/ext/cld3/utf8acceptinterchange.h +486 -0
- data/ext/cld3/utf8prop_lettermarkscriptnum.h +1631 -0
- data/ext/cld3/utf8repl_lettermarklower.h +758 -0
- data/ext/cld3/utf8scannot_lettermarkspecial.h +1455 -0
- data/ext/cld3/utf8statetable.cc +1344 -0
- data/ext/cld3/utf8statetable.h +285 -0
- data/ext/cld3/utils.cc +241 -0
- data/ext/cld3/utils.h +144 -0
- data/ext/cld3/workspace.cc +64 -0
- data/ext/cld3/workspace.h +177 -0
- data/lib/cld3.rb +99 -0
- metadata +158 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
|
15
|
+
//
|
16
|
+
// Author: dsites@google.com (Dick Sites)
|
17
|
+
//
|
18
|
+
|
19
|
+
#ifndef SCRIPT_SPAN_OFFSETMAP_H_
|
20
|
+
#define SCRIPT_SPAN_OFFSETMAP_H_
|
21
|
+
|
22
|
+
#include <string> // for string
|
23
|
+
|
24
|
+
#include "integral_types.h" // for uint32
|
25
|
+
|
26
|
+
// ***************************** OffsetMap **************************
|
27
|
+
//
|
28
|
+
// An OffsetMap object is a container for a mapping from offsets in one text
|
29
|
+
// buffer A' to offsets in another text buffer A. It is most useful when A' is
|
30
|
+
// built from A via substitutions that occasionally do not preserve byte length.
|
31
|
+
//
|
32
|
+
// A series of operators are used to build the correspondence map, then
|
33
|
+
// calls can be made to map an offset in A' to an offset in A, or vice versa.
|
34
|
+
// The map starts with offset 0 in A corresponding to offset 0 in A'.
|
35
|
+
// The mapping is then built sequentially, adding on byte ranges that are
|
36
|
+
// identical in A and A', byte ranges that are inserted in A', and byte ranges
|
37
|
+
// that are deleted from A. All bytes beyond those specified when building the
|
38
|
+
// map are assumed to correspond, i.e. a Copy(infinity) is assumed at the
|
39
|
+
// end of the map.
|
40
|
+
//
|
41
|
+
// The internal data structure records positions at which bytes are added or
|
42
|
+
// deleted. Using the map is O(1) when increasing the A' or A offset
|
43
|
+
// monotonically, and O(n) when accessing random offsets, where n is the
|
44
|
+
// number of differences.
|
45
|
+
//
|
46
|
+
|
47
|
+
namespace chrome_lang_id {
|
48
|
+
namespace CLD2 {
|
49
|
+
|
50
|
+
class OffsetMap {
|
51
|
+
public:
|
52
|
+
// Constructor, destructor
|
53
|
+
OffsetMap();
|
54
|
+
~OffsetMap();
|
55
|
+
|
56
|
+
// Clear the map
|
57
|
+
void Clear();
|
58
|
+
|
59
|
+
// Add to mapping from A to A', specifying how many next bytes correspond
|
60
|
+
// in A and A'
|
61
|
+
void Copy(int bytes);
|
62
|
+
|
63
|
+
// Add to mapping from A to A', specifying how many next bytes are
|
64
|
+
// inserted in A' while not advancing in A at all
|
65
|
+
void Insert(int bytes);
|
66
|
+
|
67
|
+
// Add to mapping from A to A', specifying how many next bytes are
|
68
|
+
// deleted from A while not advancing in A' at all
|
69
|
+
void Delete(int bytes);
|
70
|
+
|
71
|
+
// [Finish building map,] Re-position to offset 0
|
72
|
+
// This call is optional; MapForward and MapBack finish building the map
|
73
|
+
// if necessary
|
74
|
+
void Reset();
|
75
|
+
|
76
|
+
// Map an offset in A' to the corresponding offset in A
|
77
|
+
int MapBack(int aprimeoffset);
|
78
|
+
|
79
|
+
// Map an offset in A to the corresponding offset in A'
|
80
|
+
int MapForward(int aoffset);
|
81
|
+
|
82
|
+
// h = ComposeOffsetMap(g, f), where f is a map from A to A', g is
|
83
|
+
// from A' to A'' and h is from A to A''.
|
84
|
+
//
|
85
|
+
// Note that g->MoveForward(f->MoveForward(aoffset)) always equals
|
86
|
+
// to h->MoveForward(aoffset), while
|
87
|
+
// f->MoveBack(g->MoveBack(aprimeprimeoffset)) doesn't always equals
|
88
|
+
// to h->MoveBack(aprimeprimeoffset). This happens when deletion in
|
89
|
+
// f and insertion in g are at the same place. For example,
|
90
|
+
//
|
91
|
+
// A 1 2 3 4
|
92
|
+
// ^ | ^ ^
|
93
|
+
// | | / | f
|
94
|
+
// v vv v
|
95
|
+
// A' 1' 2' 3'
|
96
|
+
// ^ ^^ ^
|
97
|
+
// | | \ | g
|
98
|
+
// v | v v
|
99
|
+
// A'' 1'' 2'' 3'' 4''
|
100
|
+
//
|
101
|
+
// results in:
|
102
|
+
//
|
103
|
+
// A 1 2 3 4
|
104
|
+
// ^ ^\ ^ ^
|
105
|
+
// | | \ | | h
|
106
|
+
// v | vv v
|
107
|
+
// A'' 1'' 2'' 3'' 4''
|
108
|
+
//
|
109
|
+
// 2'' is mapped 3 in the former figure, while 2'' is mapped to 2 in
|
110
|
+
// the latter figure.
|
111
|
+
static void ComposeOffsetMap(OffsetMap* g, OffsetMap* f, OffsetMap* h);
|
112
|
+
|
113
|
+
// For testing only -- force a mapping
|
114
|
+
void StuffIt(const std::string& diffs, int max_aoffset, int max_aprimeoffset);
|
115
|
+
|
116
|
+
private:
|
117
|
+
enum MapOp {PREFIX_OP, COPY_OP, INSERT_OP, DELETE_OP};
|
118
|
+
|
119
|
+
void Flush();
|
120
|
+
void FlushAll();
|
121
|
+
void MaybeFlushAll();
|
122
|
+
void Emit(MapOp op, int len);
|
123
|
+
|
124
|
+
void SetLeft();
|
125
|
+
void SetRight();
|
126
|
+
|
127
|
+
// Back up over previous range, 1..5 bytes
|
128
|
+
// Return subscript at the beginning of that. Pins at 0
|
129
|
+
int Backup(int sub);
|
130
|
+
|
131
|
+
// Parse next range, 1..5 bytes
|
132
|
+
// Return subscript just off the end of that
|
133
|
+
int ParseNext(int sub, MapOp* op, int* length);
|
134
|
+
|
135
|
+
// Parse previous range, 1..5 bytes
|
136
|
+
// Return current subscript
|
137
|
+
int ParsePrevious(int sub, MapOp* op, int* length);
|
138
|
+
|
139
|
+
bool MoveRight(); // Returns true if OK
|
140
|
+
bool MoveLeft(); // Returns true if OK
|
141
|
+
|
142
|
+
// Copies insert operations from source to dest. Returns true if no
|
143
|
+
// other operations are found.
|
144
|
+
static bool CopyInserts(OffsetMap* source, OffsetMap* dest);
|
145
|
+
|
146
|
+
// Copies delete operations from source to dest. Returns true if no other
|
147
|
+
// operations are found.
|
148
|
+
static bool CopyDeletes(OffsetMap* source, OffsetMap* dest);
|
149
|
+
|
150
|
+
std::string diffs_;
|
151
|
+
MapOp pending_op_;
|
152
|
+
uint32 pending_length_;
|
153
|
+
|
154
|
+
// Offsets in the ranges below correspond to each other, with A' = A + diff
|
155
|
+
int next_diff_sub_;
|
156
|
+
int current_lo_aoffset_;
|
157
|
+
int current_hi_aoffset_;
|
158
|
+
int current_lo_aprimeoffset_;
|
159
|
+
int current_hi_aprimeoffset_;
|
160
|
+
int current_diff_;
|
161
|
+
int max_aoffset_;
|
162
|
+
int max_aprimeoffset_;
|
163
|
+
};
|
164
|
+
|
165
|
+
} // namespace CLD2
|
166
|
+
} // namespace chrome_lang_id
|
167
|
+
|
168
|
+
#endif // SCRIPT_SPAN_OFFSETMAP_H_
|
data/ext/cld3/port.h
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
// Copyright 2013 Google Inc. All Rights Reserved.
|
2
|
+
//
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
// you may not use this file except in compliance with the License.
|
5
|
+
// You may obtain a copy of the License at
|
6
|
+
//
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
//
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
// See the License for the specific language governing permissions and
|
13
|
+
// limitations under the License.
|
14
|
+
|
15
|
+
//
|
16
|
+
// These are weird things we need to do to get this compiling on
|
17
|
+
// random systems [subset].
|
18
|
+
|
19
|
+
#ifndef SCRIPT_SPAN_PORT_H_
|
20
|
+
#define SCRIPT_SPAN_PORT_H_
|
21
|
+
|
22
|
+
#include <string.h> // for memcpy()
|
23
|
+
|
24
|
+
#include "integral_types.h"
|
25
|
+
|
26
|
+
namespace chrome_lang_id {
|
27
|
+
namespace CLD2 {
|
28
|
+
|
29
|
+
// Portable handling of unaligned loads, stores, and copies.
|
30
|
+
// On some platforms, like ARM, the copy functions can be more efficient
|
31
|
+
// then a load and a store.
|
32
|
+
|
33
|
+
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
|
34
|
+
|
35
|
+
// x86 and x86-64 can perform unaligned loads/stores directly;
|
36
|
+
// modern PowerPC hardware can also do unaligned integer loads and stores;
|
37
|
+
// but note: the FPU still sends unaligned loads and stores to a trap handler!
|
38
|
+
|
39
|
+
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
|
40
|
+
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
|
41
|
+
#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
|
42
|
+
|
43
|
+
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
|
44
|
+
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
|
45
|
+
#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
|
46
|
+
|
47
|
+
#elif defined(__arm__) && \
|
48
|
+
!defined(__ARM_ARCH_5__) && \
|
49
|
+
!defined(__ARM_ARCH_5T__) && \
|
50
|
+
!defined(__ARM_ARCH_5TE__) && \
|
51
|
+
!defined(__ARM_ARCH_5TEJ__) && \
|
52
|
+
!defined(__ARM_ARCH_6__) && \
|
53
|
+
!defined(__ARM_ARCH_6J__) && \
|
54
|
+
!defined(__ARM_ARCH_6K__) && \
|
55
|
+
!defined(__ARM_ARCH_6Z__) && \
|
56
|
+
!defined(__ARM_ARCH_6ZK__) && \
|
57
|
+
!defined(__ARM_ARCH_6T2__) && \
|
58
|
+
!defined(__ARM_ARCH_7__) && \
|
59
|
+
!defined(__ARM_ARCH_7A__) && \
|
60
|
+
!defined(__ARM_ARCH_7M__) && \
|
61
|
+
!defined(__ARM_ARCH_7R__) && \
|
62
|
+
!defined(__ARM_ARCH_8__) && \
|
63
|
+
!defined(__ARM_ARCH_8A__)
|
64
|
+
|
65
|
+
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
66
|
+
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
67
|
+
// do an unaligned read and rotate the words around a bit, or do the reads very
|
68
|
+
// slowly (trip through kernel mode). There's no simple #define that says just
|
69
|
+
// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
|
70
|
+
// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
|
71
|
+
// so in time, maybe we can move on to that.
|
72
|
+
//
|
73
|
+
// Note that even if a chipset supports unaligned access, it might not be
|
74
|
+
// enabled in any given system, e.g.:
|
75
|
+
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0491c/CIHCGCFD.html
|
76
|
+
// Therefore, it's generally just not safe to allow unaligned access on any ARM
|
77
|
+
// variant.
|
78
|
+
//
|
79
|
+
// This is a mess, but there's not much we can do about it.
|
80
|
+
|
81
|
+
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
|
82
|
+
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
|
83
|
+
|
84
|
+
#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
|
85
|
+
#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
|
86
|
+
|
87
|
+
// TODO(sesse): NEON supports unaligned 64-bit loads and stores.
|
88
|
+
// See if that would be more efficient on platforms supporting it,
|
89
|
+
// at least for copies.
|
90
|
+
|
91
|
+
inline uint64 UNALIGNED_LOAD64(const void *p) {
|
92
|
+
uint64 t;
|
93
|
+
memcpy(&t, p, sizeof t);
|
94
|
+
return t;
|
95
|
+
}
|
96
|
+
|
97
|
+
inline void UNALIGNED_STORE64(void *p, uint64 v) {
|
98
|
+
memcpy(p, &v, sizeof v);
|
99
|
+
}
|
100
|
+
|
101
|
+
#else
|
102
|
+
|
103
|
+
#define NEED_ALIGNED_LOADS
|
104
|
+
|
105
|
+
// These functions are provided for architectures that don't support
|
106
|
+
// unaligned loads and stores.
|
107
|
+
|
108
|
+
inline uint16 UNALIGNED_LOAD16(const void *p) {
|
109
|
+
uint16 t;
|
110
|
+
memcpy(&t, p, sizeof t);
|
111
|
+
return t;
|
112
|
+
}
|
113
|
+
|
114
|
+
inline uint32 UNALIGNED_LOAD32(const void *p) {
|
115
|
+
uint32 t;
|
116
|
+
memcpy(&t, p, sizeof t);
|
117
|
+
return t;
|
118
|
+
}
|
119
|
+
|
120
|
+
inline uint64 UNALIGNED_LOAD64(const void *p) {
|
121
|
+
uint64 t;
|
122
|
+
memcpy(&t, p, sizeof t);
|
123
|
+
return t;
|
124
|
+
}
|
125
|
+
|
126
|
+
inline void UNALIGNED_STORE16(void *p, uint16 v) {
|
127
|
+
memcpy(p, &v, sizeof v);
|
128
|
+
}
|
129
|
+
|
130
|
+
inline void UNALIGNED_STORE32(void *p, uint32 v) {
|
131
|
+
memcpy(p, &v, sizeof v);
|
132
|
+
}
|
133
|
+
|
134
|
+
inline void UNALIGNED_STORE64(void *p, uint64 v) {
|
135
|
+
memcpy(p, &v, sizeof v);
|
136
|
+
}
|
137
|
+
|
138
|
+
#endif
|
139
|
+
|
140
|
+
} // End namespace CLD2
|
141
|
+
} // End namespace chrome_lang_id
|
142
|
+
|
143
|
+
#endif // SCRIPT_SPAN_PORT_H_
|
@@ -0,0 +1,28 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#include "registry.h"
|
17
|
+
|
18
|
+
namespace chrome_lang_id {
|
19
|
+
|
20
|
+
// Global list of all component registries.
|
21
|
+
RegistryMetadata *global_registry_list = NULL;
|
22
|
+
|
23
|
+
void RegistryMetadata::Register(RegistryMetadata *registry) {
|
24
|
+
registry->set_link(global_registry_list);
|
25
|
+
global_registry_list = registry;
|
26
|
+
}
|
27
|
+
|
28
|
+
} // namespace chrome_lang_id
|
data/ext/cld3/registry.h
ADDED
@@ -0,0 +1,242 @@
|
|
1
|
+
/* Copyright 2016 Google Inc. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
// Registry for component registration. These classes can be used for creating
|
17
|
+
// registries of components conforming to the same interface. This is useful for
|
18
|
+
// making a component-based architecture where the specific implementation
|
19
|
+
// classes can be selected at runtime. There is support for both class-based and
|
20
|
+
// instance based registries.
|
21
|
+
//
|
22
|
+
// Example:
|
23
|
+
// function.h:
|
24
|
+
//
|
25
|
+
// class Function : public RegisterableInstance<Function> {
|
26
|
+
// public:
|
27
|
+
// virtual double Evaluate(double x) = 0;
|
28
|
+
// };
|
29
|
+
//
|
30
|
+
// #define REGISTER_FUNCTION(type, component)
|
31
|
+
// REGISTER_INSTANCE_COMPONENT(Function, type, component);
|
32
|
+
//
|
33
|
+
// function.cc:
|
34
|
+
//
|
35
|
+
// REGISTER_INSTANCE_REGISTRY("function", Function);
|
36
|
+
//
|
37
|
+
// class Cos : public Function {
|
38
|
+
// public:
|
39
|
+
// double Evaluate(double x) { return cos(x); }
|
40
|
+
// };
|
41
|
+
//
|
42
|
+
// class Exp : public Function {
|
43
|
+
// public:
|
44
|
+
// double Evaluate(double x) { return exp(x); }
|
45
|
+
// };
|
46
|
+
//
|
47
|
+
// REGISTER_FUNCTION("cos", Cos);
|
48
|
+
// REGISTER_FUNCTION("exp", Exp);
|
49
|
+
//
|
50
|
+
// Function *f = Function::Lookup("cos");
|
51
|
+
// double result = f->Evaluate(arg);
|
52
|
+
|
53
|
+
#ifndef REGISTRY_H_
|
54
|
+
#define REGISTRY_H_
|
55
|
+
|
56
|
+
#include <string.h>
|
57
|
+
|
58
|
+
#include <string>
|
59
|
+
|
60
|
+
#include "base.h"
|
61
|
+
|
62
|
+
namespace chrome_lang_id {
|
63
|
+
|
64
|
+
// Component metadata with information about name, class, and code location.
|
65
|
+
class ComponentMetadata {
|
66
|
+
public:
|
67
|
+
ComponentMetadata(const char *name, const char *class_name, const char *file,
|
68
|
+
int line)
|
69
|
+
: name_(name),
|
70
|
+
class_name_(class_name),
|
71
|
+
file_(file),
|
72
|
+
line_(line),
|
73
|
+
link_(NULL) {}
|
74
|
+
|
75
|
+
// Getters.
|
76
|
+
const char *name() const { return name_; }
|
77
|
+
const char *class_name() const { return class_name_; }
|
78
|
+
const char *file() const { return file_; }
|
79
|
+
int line() const { return line_; }
|
80
|
+
|
81
|
+
// Metadata objects can be linked in a list.
|
82
|
+
ComponentMetadata *link() const { return link_; }
|
83
|
+
void set_link(ComponentMetadata *link) { link_ = link; }
|
84
|
+
|
85
|
+
private:
|
86
|
+
// Component name.
|
87
|
+
const char *name_;
|
88
|
+
|
89
|
+
// Name of class for component.
|
90
|
+
const char *class_name_;
|
91
|
+
|
92
|
+
// Code file and location where the component was registered.
|
93
|
+
const char *file_;
|
94
|
+
int line_;
|
95
|
+
|
96
|
+
// Link to next metadata object in list.
|
97
|
+
ComponentMetadata *link_;
|
98
|
+
};
|
99
|
+
|
100
|
+
// The master registry contains all registered component registries. A registry
|
101
|
+
// is not registered in the master registry until the first component of that
|
102
|
+
// type is registered.
|
103
|
+
class RegistryMetadata : public ComponentMetadata {
|
104
|
+
public:
|
105
|
+
RegistryMetadata(const char *name, const char *class_name, const char *file,
|
106
|
+
int line)
|
107
|
+
: ComponentMetadata(name, class_name, file, line) {}
|
108
|
+
|
109
|
+
// Registers a component registry in the master registry.
|
110
|
+
static void Register(RegistryMetadata *registry);
|
111
|
+
};
|
112
|
+
|
113
|
+
// Registry for components. An object can be registered with a type name in the
|
114
|
+
// registry. The named instances in the registry can be returned using the
|
115
|
+
// Lookup() method. The components in the registry are put into a linked list
|
116
|
+
// of components. It is important that the component registry can be statically
|
117
|
+
// initialized in order not to depend on initialization order.
|
118
|
+
template <class T>
|
119
|
+
struct ComponentRegistry {
|
120
|
+
typedef ComponentRegistry<T> Self;
|
121
|
+
|
122
|
+
// Component registration class.
|
123
|
+
class Registrar : public ComponentMetadata {
|
124
|
+
public:
|
125
|
+
// Registers new component by linking itself into the component list of
|
126
|
+
// the registry.
|
127
|
+
Registrar(Self *registry, const char *type, const char *class_name,
|
128
|
+
const char *file, int line, T *object)
|
129
|
+
: ComponentMetadata(type, class_name, file, line), object_(object) {
|
130
|
+
// Register registry in master registry if this is the first registered
|
131
|
+
// component of this type.
|
132
|
+
if (registry->components == NULL) {
|
133
|
+
RegistryMetadata::Register(
|
134
|
+
new RegistryMetadata(registry->name, registry->class_name,
|
135
|
+
registry->file, registry->line));
|
136
|
+
}
|
137
|
+
|
138
|
+
// Register component in registry.
|
139
|
+
set_link(registry->components);
|
140
|
+
registry->components = this;
|
141
|
+
}
|
142
|
+
|
143
|
+
// Returns component type.
|
144
|
+
const char *type() const { return name(); }
|
145
|
+
|
146
|
+
// Returns component object.
|
147
|
+
T *object() const { return object_; }
|
148
|
+
|
149
|
+
// Returns the next component in the component list.
|
150
|
+
Registrar *next() const { return static_cast<Registrar *>(link()); }
|
151
|
+
|
152
|
+
private:
|
153
|
+
// Component object.
|
154
|
+
T *object_;
|
155
|
+
};
|
156
|
+
|
157
|
+
// Finds registrar for named component in registry.
|
158
|
+
const Registrar *GetComponent(const char *type) const {
|
159
|
+
Registrar *r = components;
|
160
|
+
while (r != NULL && strcmp(type, r->type()) != 0) r = r->next();
|
161
|
+
CLD3_DCHECK(r != nullptr);
|
162
|
+
|
163
|
+
return r;
|
164
|
+
}
|
165
|
+
|
166
|
+
// Finds a named component in the registry.
|
167
|
+
T *Lookup(const char *type) const { return GetComponent(type)->object(); }
|
168
|
+
T *Lookup(const string &type) const { return Lookup(type.c_str()); }
|
169
|
+
|
170
|
+
// Textual description of the kind of components in the registry.
|
171
|
+
const char *name;
|
172
|
+
|
173
|
+
// Base class name of component type.
|
174
|
+
const char *class_name;
|
175
|
+
|
176
|
+
// File and line where the registry is defined.
|
177
|
+
const char *file;
|
178
|
+
int line;
|
179
|
+
|
180
|
+
// Linked list of registered components.
|
181
|
+
Registrar *components;
|
182
|
+
};
|
183
|
+
|
184
|
+
// Base class for registerable class-based components.
|
185
|
+
template <class T>
|
186
|
+
class RegisterableClass {
|
187
|
+
public:
|
188
|
+
// Factory function type.
|
189
|
+
typedef T *(Factory)();
|
190
|
+
|
191
|
+
// Registry type.
|
192
|
+
typedef ComponentRegistry<Factory> Registry;
|
193
|
+
|
194
|
+
// Should be called before any call to Create() or registry(), i.e., before
|
195
|
+
// using the registration mechanism to register and or instantiate subclasses
|
196
|
+
// of T.
|
197
|
+
static void CreateRegistry(
|
198
|
+
const char *name,
|
199
|
+
const char *class_name,
|
200
|
+
const char *file,
|
201
|
+
int line) {
|
202
|
+
registry_ = new Registry();
|
203
|
+
registry_->name = name;
|
204
|
+
registry_->class_name = class_name;
|
205
|
+
registry_->file = file;
|
206
|
+
registry_->line = line;
|
207
|
+
registry_->components = nullptr;
|
208
|
+
}
|
209
|
+
|
210
|
+
// Should be called when one is done using the registration mechanism for
|
211
|
+
// class T.
|
212
|
+
static void DeleteRegistry() {
|
213
|
+
delete registry_;
|
214
|
+
registry_ = nullptr;
|
215
|
+
}
|
216
|
+
|
217
|
+
// Creates a new component instance.
|
218
|
+
static T *Create(const string &type) { return registry()->Lookup(type)(); }
|
219
|
+
|
220
|
+
// Returns registry for class.
|
221
|
+
static Registry *registry() { return registry_; }
|
222
|
+
|
223
|
+
private:
|
224
|
+
// Registry for class.
|
225
|
+
static Registry *registry_;
|
226
|
+
};
|
227
|
+
|
228
|
+
// Base class for registerable instance-based components.
|
229
|
+
template <class T>
|
230
|
+
class RegisterableInstance {
|
231
|
+
public:
|
232
|
+
// Registry type.
|
233
|
+
typedef ComponentRegistry<T> Registry;
|
234
|
+
|
235
|
+
private:
|
236
|
+
// Registry for class.
|
237
|
+
static Registry registry_;
|
238
|
+
};
|
239
|
+
|
240
|
+
} // namespace chrome_lang_id
|
241
|
+
|
242
|
+
#endif // REGISTRY_H_
|