OpenCC 1.2.0__cp38-cp38-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opencc/__init__.py +49 -0
- opencc/clib/__init__.py +0 -0
- opencc/clib/bin/opencc +0 -0
- opencc/clib/bin/opencc_dict +0 -0
- opencc/clib/bin/opencc_phrase_extract +0 -0
- opencc/clib/include/opencc/BinaryDict.hpp +53 -0
- opencc/clib/include/opencc/Common.hpp +82 -0
- opencc/clib/include/opencc/Config.hpp +49 -0
- opencc/clib/include/opencc/Conversion.hpp +47 -0
- opencc/clib/include/opencc/ConversionChain.hpp +43 -0
- opencc/clib/include/opencc/Converter.hpp +51 -0
- opencc/clib/include/opencc/DartsDict.hpp +60 -0
- opencc/clib/include/opencc/Dict.hpp +92 -0
- opencc/clib/include/opencc/DictConverter.hpp +32 -0
- opencc/clib/include/opencc/DictEntry.hpp +173 -0
- opencc/clib/include/opencc/DictGroup.hpp +57 -0
- opencc/clib/include/opencc/Exception.hpp +88 -0
- opencc/clib/include/opencc/Export.hpp +40 -0
- opencc/clib/include/opencc/Lexicon.hpp +70 -0
- opencc/clib/include/opencc/MarisaDict.hpp +63 -0
- opencc/clib/include/opencc/MaxMatchSegmentation.hpp +43 -0
- opencc/clib/include/opencc/Optional.hpp +76 -0
- opencc/clib/include/opencc/PhraseExtract.hpp +195 -0
- opencc/clib/include/opencc/Segmentation.hpp +32 -0
- opencc/clib/include/opencc/Segments.hpp +118 -0
- opencc/clib/include/opencc/SerializableDict.hpp +77 -0
- opencc/clib/include/opencc/SerializedValues.hpp +52 -0
- opencc/clib/include/opencc/SimpleConverter.hpp +113 -0
- opencc/clib/include/opencc/TextDict.hpp +60 -0
- opencc/clib/include/opencc/UTF8StringSlice.hpp +246 -0
- opencc/clib/include/opencc/UTF8Util.hpp +291 -0
- opencc/clib/include/opencc/opencc.h +161 -0
- opencc/clib/include/opencc/opencc_config.h +21 -0
- opencc/clib/lib/cmake/opencc/OpenCCConfig.cmake +31 -0
- opencc/clib/lib/cmake/opencc/OpenCCConfigVersion.cmake +65 -0
- opencc/clib/lib/cmake/opencc/OpenCCTargets-release.cmake +29 -0
- opencc/clib/lib/cmake/opencc/OpenCCTargets.cmake +110 -0
- opencc/clib/lib/libmarisa.a +0 -0
- opencc/clib/lib/libopencc.a +0 -0
- opencc/clib/lib/pkgconfig/opencc.pc +11 -0
- opencc/clib/opencc_clib.cpython-38-x86_64-linux-gnu.so +0 -0
- opencc/clib/share/opencc/HKVariants.ocd2 +0 -0
- opencc/clib/share/opencc/HKVariantsRev.ocd2 +0 -0
- opencc/clib/share/opencc/HKVariantsRevPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/JPShinjitaiCharacters.ocd2 +0 -0
- opencc/clib/share/opencc/JPShinjitaiPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/JPVariants.ocd2 +0 -0
- opencc/clib/share/opencc/JPVariantsRev.ocd2 +0 -0
- opencc/clib/share/opencc/STCharacters.ocd2 +0 -0
- opencc/clib/share/opencc/STPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/TSCharacters.ocd2 +0 -0
- opencc/clib/share/opencc/TSPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/TWPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/TWPhrasesRev.ocd2 +0 -0
- opencc/clib/share/opencc/TWVariants.ocd2 +0 -0
- opencc/clib/share/opencc/TWVariantsRev.ocd2 +0 -0
- opencc/clib/share/opencc/TWVariantsRevPhrases.ocd2 +0 -0
- opencc/clib/share/opencc/hk2s.json +33 -0
- opencc/clib/share/opencc/hk2t.json +22 -0
- opencc/clib/share/opencc/jp2t.json +25 -0
- opencc/clib/share/opencc/s2hk.json +27 -0
- opencc/clib/share/opencc/s2t.json +22 -0
- opencc/clib/share/opencc/s2tw.json +27 -0
- opencc/clib/share/opencc/s2twp.json +32 -0
- opencc/clib/share/opencc/t2hk.json +16 -0
- opencc/clib/share/opencc/t2jp.json +16 -0
- opencc/clib/share/opencc/t2s.json +22 -0
- opencc/clib/share/opencc/t2tw.json +16 -0
- opencc/clib/share/opencc/tw2s.json +33 -0
- opencc/clib/share/opencc/tw2sp.json +36 -0
- opencc/clib/share/opencc/tw2t.json +22 -0
- opencc/py.typed +0 -0
- opencc-1.2.0.dist-info/AUTHORS +12 -0
- opencc-1.2.0.dist-info/LICENSE +56 -0
- opencc-1.2.0.dist-info/METADATA +347 -0
- opencc-1.2.0.dist-info/RECORD +78 -0
- opencc-1.2.0.dist-info/WHEEL +5 -0
- opencc-1.2.0.dist-info/top_level.txt +1 -0
opencc/__init__.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
import opencc_clib
|
|
5
|
+
except ImportError:
|
|
6
|
+
from opencc.clib import opencc_clib
|
|
7
|
+
|
|
8
|
+
__all__ = ['CONFIGS', 'OpenCC', '__version__']
|
|
9
|
+
|
|
10
|
+
__version__ = opencc_clib.__version__
|
|
11
|
+
_this_dir = os.path.dirname(os.path.abspath(__file__))
|
|
12
|
+
_opencc_share_dir = os.path.join(_this_dir, 'clib', 'share', 'opencc')
|
|
13
|
+
_opencc_rootdir = os.path.abspath(os.path.join(_this_dir, '..', '..'))
|
|
14
|
+
_opencc_configdir = os.path.join(_opencc_rootdir, 'data', 'config')
|
|
15
|
+
|
|
16
|
+
if os.path.isdir(_opencc_share_dir):
|
|
17
|
+
CONFIGS = [f for f in os.listdir(_opencc_share_dir) if f.endswith('.json')]
|
|
18
|
+
elif os.path.isdir(_opencc_configdir):
|
|
19
|
+
CONFIGS = [f for f in os.listdir(_opencc_configdir) if f.endswith('.json')]
|
|
20
|
+
else:
|
|
21
|
+
CONFIGS = []
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _append_path_to_env(name: str, path: str) -> None:
|
|
25
|
+
value = os.environ.get(name, '')
|
|
26
|
+
if path in value: # Path already exists
|
|
27
|
+
return
|
|
28
|
+
if value == '':
|
|
29
|
+
value = path
|
|
30
|
+
else:
|
|
31
|
+
value += f':{path}'
|
|
32
|
+
os.environ[name] = value
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class OpenCC(opencc_clib._OpenCC):
|
|
36
|
+
|
|
37
|
+
def __init__(self, config: str = 't2s') -> None:
|
|
38
|
+
if not config.endswith('.json'):
|
|
39
|
+
config += '.json'
|
|
40
|
+
if not os.path.isfile(config):
|
|
41
|
+
config_under_share_dir = os.path.join(_opencc_share_dir, config)
|
|
42
|
+
if os.path.isfile(config_under_share_dir):
|
|
43
|
+
config = config_under_share_dir
|
|
44
|
+
super().__init__(config)
|
|
45
|
+
self.config = config
|
|
46
|
+
|
|
47
|
+
def convert(self, text: str):
|
|
48
|
+
byte_text = text.encode('utf-8')
|
|
49
|
+
return super().convert(byte_text, len(byte_text))
|
opencc/clib/__init__.py
ADDED
|
File without changes
|
opencc/clib/bin/opencc
ADDED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "SerializableDict.hpp"
|
|
23
|
+
|
|
24
|
+
namespace opencc {
|
|
25
|
+
/**
|
|
26
|
+
* Binary dictionary for faster deserialization
|
|
27
|
+
* @ingroup opencc_cpp_api
|
|
28
|
+
*/
|
|
29
|
+
class OPENCC_EXPORT BinaryDict : public SerializableDict {
|
|
30
|
+
public:
|
|
31
|
+
BinaryDict(const LexiconPtr& _lexicon) : lexicon(_lexicon) {}
|
|
32
|
+
|
|
33
|
+
virtual ~BinaryDict() {}
|
|
34
|
+
|
|
35
|
+
virtual void SerializeToFile(FILE* fp) const;
|
|
36
|
+
|
|
37
|
+
static BinaryDictPtr NewFromFile(FILE* fp);
|
|
38
|
+
|
|
39
|
+
const LexiconPtr& GetLexicon() const { return lexicon; }
|
|
40
|
+
|
|
41
|
+
size_t KeyMaxLength() const;
|
|
42
|
+
|
|
43
|
+
private:
|
|
44
|
+
LexiconPtr lexicon;
|
|
45
|
+
std::string keyBuffer;
|
|
46
|
+
std::string valueBuffer;
|
|
47
|
+
|
|
48
|
+
void ConstructBuffer(std::string& keyBuffer, std::vector<size_t>& keyOffset,
|
|
49
|
+
size_t& keyTotalLength, std::string& valueBuffer,
|
|
50
|
+
std::vector<size_t>& valueOffset,
|
|
51
|
+
size_t& valueTotalLength) const;
|
|
52
|
+
};
|
|
53
|
+
} // namespace opencc
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
// Microsoft Visual C++ specific
|
|
22
|
+
#if defined(_MSC_VER) && (_MSC_VER >= 1020)
|
|
23
|
+
#pragma warning(disable : 4251 4266 4350 4503 4512 4514 4710 4820)
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
#include <cstddef>
|
|
27
|
+
#include <memory>
|
|
28
|
+
#include <string>
|
|
29
|
+
#include <vector>
|
|
30
|
+
|
|
31
|
+
#include "Export.hpp"
|
|
32
|
+
#include "Optional.hpp"
|
|
33
|
+
#include "opencc_config.h"
|
|
34
|
+
|
|
35
|
+
// Forward decalarations and alias
|
|
36
|
+
namespace opencc {
|
|
37
|
+
class Config;
|
|
38
|
+
class Conversion;
|
|
39
|
+
class ConversionChain;
|
|
40
|
+
class Converter;
|
|
41
|
+
class Dict;
|
|
42
|
+
class DictEntry;
|
|
43
|
+
class DictGroup;
|
|
44
|
+
class Lexicon;
|
|
45
|
+
class MarisaDict;
|
|
46
|
+
class MultiValueDictEntry;
|
|
47
|
+
class NoValueDictEntry;
|
|
48
|
+
class Segmentation;
|
|
49
|
+
class Segments;
|
|
50
|
+
class SerializableDict;
|
|
51
|
+
class SingleValueDictEntry;
|
|
52
|
+
class TextDict;
|
|
53
|
+
typedef std::shared_ptr<Conversion> ConversionPtr;
|
|
54
|
+
typedef std::shared_ptr<ConversionChain> ConversionChainPtr;
|
|
55
|
+
typedef std::shared_ptr<Converter> ConverterPtr;
|
|
56
|
+
typedef std::shared_ptr<Dict> DictPtr;
|
|
57
|
+
typedef std::shared_ptr<DictGroup> DictGroupPtr;
|
|
58
|
+
typedef std::shared_ptr<Lexicon> LexiconPtr;
|
|
59
|
+
typedef std::shared_ptr<MarisaDict> MarisaDictPtr;
|
|
60
|
+
typedef std::shared_ptr<Segmentation> SegmentationPtr;
|
|
61
|
+
typedef std::shared_ptr<Segments> SegmentsPtr;
|
|
62
|
+
typedef std::shared_ptr<SerializableDict> SerializableDictPtr;
|
|
63
|
+
typedef std::shared_ptr<TextDict> TextDictPtr;
|
|
64
|
+
|
|
65
|
+
#ifdef OPENCC_ENABLE_DARTS
|
|
66
|
+
class BinaryDict;
|
|
67
|
+
class DartsDict;
|
|
68
|
+
typedef std::shared_ptr<BinaryDict> BinaryDictPtr;
|
|
69
|
+
typedef std::shared_ptr<DartsDict> DartsDictPtr;
|
|
70
|
+
#endif
|
|
71
|
+
|
|
72
|
+
} // namespace opencc
|
|
73
|
+
|
|
74
|
+
#ifndef PKGDATADIR
|
|
75
|
+
const std::string PACKAGE_DATA_DIRECTORY = "";
|
|
76
|
+
#else // ifndef PKGDATADIR
|
|
77
|
+
const std::string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/";
|
|
78
|
+
#endif // ifndef PKGDATADIR
|
|
79
|
+
|
|
80
|
+
#ifndef VERSION
|
|
81
|
+
#define VERSION "1.0.*"
|
|
82
|
+
#endif // ifndef VERSION
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
|
|
23
|
+
namespace opencc {
|
|
24
|
+
/**
|
|
25
|
+
* Configuration loader
|
|
26
|
+
* @ingroup opencc_cpp_api
|
|
27
|
+
*/
|
|
28
|
+
class OPENCC_EXPORT Config {
|
|
29
|
+
public:
|
|
30
|
+
Config();
|
|
31
|
+
|
|
32
|
+
virtual ~Config();
|
|
33
|
+
|
|
34
|
+
ConverterPtr NewFromString(const std::string& json,
|
|
35
|
+
const std::string& configDirectory);
|
|
36
|
+
|
|
37
|
+
ConverterPtr NewFromString(const std::string& json,
|
|
38
|
+
const std::vector<std::string>& paths);
|
|
39
|
+
|
|
40
|
+
ConverterPtr NewFromFile(const std::string& fileName);
|
|
41
|
+
|
|
42
|
+
ConverterPtr NewFromFile(const std::string& fileName,
|
|
43
|
+
const std::vector<std::string>& paths,
|
|
44
|
+
const char* argv0);
|
|
45
|
+
|
|
46
|
+
private:
|
|
47
|
+
void* internal;
|
|
48
|
+
};
|
|
49
|
+
} // namespace opencc
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "Segmentation.hpp"
|
|
23
|
+
|
|
24
|
+
namespace opencc {
|
|
25
|
+
/**
|
|
26
|
+
* Conversion interface
|
|
27
|
+
* @ingroup opencc_cpp_api
|
|
28
|
+
*/
|
|
29
|
+
class OPENCC_EXPORT Conversion {
|
|
30
|
+
public:
|
|
31
|
+
Conversion(DictPtr _dict) : dict(_dict) {}
|
|
32
|
+
|
|
33
|
+
// Convert single phrase
|
|
34
|
+
std::string Convert(const std::string& phrase) const;
|
|
35
|
+
|
|
36
|
+
// Convert single phrase
|
|
37
|
+
std::string Convert(const char* phrase) const;
|
|
38
|
+
|
|
39
|
+
// Convert segmented text
|
|
40
|
+
SegmentsPtr Convert(const SegmentsPtr& input) const;
|
|
41
|
+
|
|
42
|
+
const DictPtr GetDict() const { return dict; }
|
|
43
|
+
|
|
44
|
+
private:
|
|
45
|
+
const DictPtr dict;
|
|
46
|
+
};
|
|
47
|
+
} // namespace opencc
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include <list>
|
|
22
|
+
|
|
23
|
+
#include "Common.hpp"
|
|
24
|
+
#include "Conversion.hpp"
|
|
25
|
+
|
|
26
|
+
namespace opencc {
|
|
27
|
+
/**
|
|
28
|
+
* Chain of conversions
|
|
29
|
+
* Consists of a list of conversions. Converts input in sequence.
|
|
30
|
+
* @ingroup opencc_cpp_api
|
|
31
|
+
*/
|
|
32
|
+
class OPENCC_EXPORT ConversionChain {
|
|
33
|
+
public:
|
|
34
|
+
ConversionChain(const std::list<ConversionPtr> _conversions);
|
|
35
|
+
|
|
36
|
+
SegmentsPtr Convert(const SegmentsPtr& input) const;
|
|
37
|
+
|
|
38
|
+
const std::list<ConversionPtr> GetConversions() const { return conversions; }
|
|
39
|
+
|
|
40
|
+
private:
|
|
41
|
+
const std::list<ConversionPtr> conversions;
|
|
42
|
+
};
|
|
43
|
+
} // namespace opencc
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "Segmentation.hpp"
|
|
23
|
+
|
|
24
|
+
namespace opencc {
|
|
25
|
+
/**
|
|
26
|
+
* Controller of segmentation and conversion
|
|
27
|
+
* @ingroup opencc_cpp_api
|
|
28
|
+
*/
|
|
29
|
+
class OPENCC_EXPORT Converter {
|
|
30
|
+
public:
|
|
31
|
+
Converter(const std::string& _name, SegmentationPtr _segmentation,
|
|
32
|
+
ConversionChainPtr _conversionChain)
|
|
33
|
+
: name(_name), segmentation(_segmentation),
|
|
34
|
+
conversionChain(_conversionChain) {}
|
|
35
|
+
|
|
36
|
+
std::string Convert(const std::string& text) const;
|
|
37
|
+
|
|
38
|
+
size_t Convert(const char* input, char* output) const;
|
|
39
|
+
|
|
40
|
+
const SegmentationPtr GetSegmentation() const { return segmentation; }
|
|
41
|
+
|
|
42
|
+
const ConversionChainPtr GetConversionChain() const {
|
|
43
|
+
return conversionChain;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
private:
|
|
47
|
+
const std::string name;
|
|
48
|
+
const SegmentationPtr segmentation;
|
|
49
|
+
const ConversionChainPtr conversionChain;
|
|
50
|
+
};
|
|
51
|
+
} // namespace opencc
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "SerializableDict.hpp"
|
|
23
|
+
|
|
24
|
+
namespace opencc {
|
|
25
|
+
/**
|
|
26
|
+
* Darts dictionary
|
|
27
|
+
* @ingroup opencc_cpp_api
|
|
28
|
+
*/
|
|
29
|
+
class OPENCC_EXPORT DartsDict : public Dict, public SerializableDict {
|
|
30
|
+
public:
|
|
31
|
+
virtual ~DartsDict();
|
|
32
|
+
|
|
33
|
+
virtual size_t KeyMaxLength() const;
|
|
34
|
+
|
|
35
|
+
virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
|
|
36
|
+
|
|
37
|
+
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
|
|
38
|
+
size_t len) const;
|
|
39
|
+
|
|
40
|
+
virtual LexiconPtr GetLexicon() const;
|
|
41
|
+
|
|
42
|
+
virtual void SerializeToFile(FILE* fp) const;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Constructs a DartsDict from another dictionary.
|
|
46
|
+
*/
|
|
47
|
+
static DartsDictPtr NewFromDict(const Dict& thatDict);
|
|
48
|
+
|
|
49
|
+
static DartsDictPtr NewFromFile(FILE* fp);
|
|
50
|
+
|
|
51
|
+
private:
|
|
52
|
+
DartsDict();
|
|
53
|
+
|
|
54
|
+
size_t maxLength;
|
|
55
|
+
LexiconPtr lexicon;
|
|
56
|
+
|
|
57
|
+
class DartsInternal;
|
|
58
|
+
DartsInternal* internal;
|
|
59
|
+
};
|
|
60
|
+
} // namespace opencc
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
#include "DictEntry.hpp"
|
|
23
|
+
|
|
24
|
+
namespace opencc {
|
|
25
|
+
/**
|
|
26
|
+
* Abstract class of dictionary
|
|
27
|
+
* @ingroup opencc_cpp_api
|
|
28
|
+
*/
|
|
29
|
+
class OPENCC_EXPORT Dict {
|
|
30
|
+
public:
|
|
31
|
+
/**
|
|
32
|
+
* Matches a word exactly and returns the DictEntry or Optional::Null().
|
|
33
|
+
*/
|
|
34
|
+
virtual Optional<const DictEntry*> Match(const char* word,
|
|
35
|
+
size_t len) const = 0;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Matches a word exactly and returns the DictEntry or Optional::Null().
|
|
39
|
+
*/
|
|
40
|
+
Optional<const DictEntry*> Match(const std::string& word) const {
|
|
41
|
+
return Match(word.c_str(), word.length());
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Matches the longest matched prefix of a word.
|
|
46
|
+
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
|
|
47
|
+
* the longest prefix of "banana" matched is "bana".
|
|
48
|
+
*/
|
|
49
|
+
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
|
|
50
|
+
size_t len) const;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Matches the longest matched prefix of a word.
|
|
54
|
+
*/
|
|
55
|
+
Optional<const DictEntry*> MatchPrefix(const char* word) const {
|
|
56
|
+
return MatchPrefix(word, KeyMaxLength());
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Matches the longest matched prefix of a word.
|
|
61
|
+
*/
|
|
62
|
+
Optional<const DictEntry*> MatchPrefix(const std::string& word) const {
|
|
63
|
+
return MatchPrefix(word.c_str(), word.length());
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Returns all matched prefixes of a word, sorted by the length (desc).
|
|
68
|
+
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
|
|
69
|
+
* all the matched prefixes of "banana" are "bana", "ban", "ba", "b".
|
|
70
|
+
*/
|
|
71
|
+
virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
|
|
72
|
+
size_t len) const;
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Returns all matched prefixes of a word, sorted by the length (desc).
|
|
76
|
+
*/
|
|
77
|
+
std::vector<const DictEntry*>
|
|
78
|
+
MatchAllPrefixes(const std::string& word) const {
|
|
79
|
+
return MatchAllPrefixes(word.c_str(), word.length());
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Returns the length of the longest key in the dictionary.
|
|
84
|
+
*/
|
|
85
|
+
virtual size_t KeyMaxLength() const = 0;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Returns all entries in the dictionary.
|
|
89
|
+
*/
|
|
90
|
+
virtual LexiconPtr GetLexicon() const = 0;
|
|
91
|
+
};
|
|
92
|
+
} // namespace opencc
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Open Chinese Convert
|
|
3
|
+
*
|
|
4
|
+
* Copyright 2010-2017 Carbo Kuo <byvoid@byvoid.com>
|
|
5
|
+
*
|
|
6
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
* you may not use this file except in compliance with the License.
|
|
8
|
+
* You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
* See the License for the specific language governing permissions and
|
|
16
|
+
* limitations under the License.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
#pragma once
|
|
20
|
+
|
|
21
|
+
#include "Common.hpp"
|
|
22
|
+
|
|
23
|
+
namespace opencc {
|
|
24
|
+
/**
|
|
25
|
+
* Converts a dictionary from a format to another.
|
|
26
|
+
* @ingroup opencc_cpp_api
|
|
27
|
+
*/
|
|
28
|
+
OPENCC_EXPORT void ConvertDictionary(const std::string& inputFileName,
|
|
29
|
+
const std::string& outputFileName,
|
|
30
|
+
const std::string& formatFrom,
|
|
31
|
+
const std::string& formatTo);
|
|
32
|
+
} // namespace opencc
|