OpenCC 1.2.0__cp38-cp38-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. opencc/__init__.py +49 -0
  2. opencc/clib/__init__.py +0 -0
  3. opencc/clib/bin/opencc +0 -0
  4. opencc/clib/bin/opencc_dict +0 -0
  5. opencc/clib/bin/opencc_phrase_extract +0 -0
  6. opencc/clib/include/opencc/BinaryDict.hpp +53 -0
  7. opencc/clib/include/opencc/Common.hpp +82 -0
  8. opencc/clib/include/opencc/Config.hpp +49 -0
  9. opencc/clib/include/opencc/Conversion.hpp +47 -0
  10. opencc/clib/include/opencc/ConversionChain.hpp +43 -0
  11. opencc/clib/include/opencc/Converter.hpp +51 -0
  12. opencc/clib/include/opencc/DartsDict.hpp +60 -0
  13. opencc/clib/include/opencc/Dict.hpp +92 -0
  14. opencc/clib/include/opencc/DictConverter.hpp +32 -0
  15. opencc/clib/include/opencc/DictEntry.hpp +173 -0
  16. opencc/clib/include/opencc/DictGroup.hpp +57 -0
  17. opencc/clib/include/opencc/Exception.hpp +88 -0
  18. opencc/clib/include/opencc/Export.hpp +40 -0
  19. opencc/clib/include/opencc/Lexicon.hpp +70 -0
  20. opencc/clib/include/opencc/MarisaDict.hpp +63 -0
  21. opencc/clib/include/opencc/MaxMatchSegmentation.hpp +43 -0
  22. opencc/clib/include/opencc/Optional.hpp +76 -0
  23. opencc/clib/include/opencc/PhraseExtract.hpp +195 -0
  24. opencc/clib/include/opencc/Segmentation.hpp +32 -0
  25. opencc/clib/include/opencc/Segments.hpp +118 -0
  26. opencc/clib/include/opencc/SerializableDict.hpp +77 -0
  27. opencc/clib/include/opencc/SerializedValues.hpp +52 -0
  28. opencc/clib/include/opencc/SimpleConverter.hpp +113 -0
  29. opencc/clib/include/opencc/TextDict.hpp +60 -0
  30. opencc/clib/include/opencc/UTF8StringSlice.hpp +246 -0
  31. opencc/clib/include/opencc/UTF8Util.hpp +291 -0
  32. opencc/clib/include/opencc/opencc.h +161 -0
  33. opencc/clib/include/opencc/opencc_config.h +21 -0
  34. opencc/clib/lib/cmake/opencc/OpenCCConfig.cmake +31 -0
  35. opencc/clib/lib/cmake/opencc/OpenCCConfigVersion.cmake +65 -0
  36. opencc/clib/lib/cmake/opencc/OpenCCTargets-release.cmake +29 -0
  37. opencc/clib/lib/cmake/opencc/OpenCCTargets.cmake +110 -0
  38. opencc/clib/lib/libmarisa.a +0 -0
  39. opencc/clib/lib/libopencc.a +0 -0
  40. opencc/clib/lib/pkgconfig/opencc.pc +11 -0
  41. opencc/clib/opencc_clib.cpython-38-x86_64-linux-gnu.so +0 -0
  42. opencc/clib/share/opencc/HKVariants.ocd2 +0 -0
  43. opencc/clib/share/opencc/HKVariantsRev.ocd2 +0 -0
  44. opencc/clib/share/opencc/HKVariantsRevPhrases.ocd2 +0 -0
  45. opencc/clib/share/opencc/JPShinjitaiCharacters.ocd2 +0 -0
  46. opencc/clib/share/opencc/JPShinjitaiPhrases.ocd2 +0 -0
  47. opencc/clib/share/opencc/JPVariants.ocd2 +0 -0
  48. opencc/clib/share/opencc/JPVariantsRev.ocd2 +0 -0
  49. opencc/clib/share/opencc/STCharacters.ocd2 +0 -0
  50. opencc/clib/share/opencc/STPhrases.ocd2 +0 -0
  51. opencc/clib/share/opencc/TSCharacters.ocd2 +0 -0
  52. opencc/clib/share/opencc/TSPhrases.ocd2 +0 -0
  53. opencc/clib/share/opencc/TWPhrases.ocd2 +0 -0
  54. opencc/clib/share/opencc/TWPhrasesRev.ocd2 +0 -0
  55. opencc/clib/share/opencc/TWVariants.ocd2 +0 -0
  56. opencc/clib/share/opencc/TWVariantsRev.ocd2 +0 -0
  57. opencc/clib/share/opencc/TWVariantsRevPhrases.ocd2 +0 -0
  58. opencc/clib/share/opencc/hk2s.json +33 -0
  59. opencc/clib/share/opencc/hk2t.json +22 -0
  60. opencc/clib/share/opencc/jp2t.json +25 -0
  61. opencc/clib/share/opencc/s2hk.json +27 -0
  62. opencc/clib/share/opencc/s2t.json +22 -0
  63. opencc/clib/share/opencc/s2tw.json +27 -0
  64. opencc/clib/share/opencc/s2twp.json +32 -0
  65. opencc/clib/share/opencc/t2hk.json +16 -0
  66. opencc/clib/share/opencc/t2jp.json +16 -0
  67. opencc/clib/share/opencc/t2s.json +22 -0
  68. opencc/clib/share/opencc/t2tw.json +16 -0
  69. opencc/clib/share/opencc/tw2s.json +33 -0
  70. opencc/clib/share/opencc/tw2sp.json +36 -0
  71. opencc/clib/share/opencc/tw2t.json +22 -0
  72. opencc/py.typed +0 -0
  73. opencc-1.2.0.dist-info/AUTHORS +12 -0
  74. opencc-1.2.0.dist-info/LICENSE +56 -0
  75. opencc-1.2.0.dist-info/METADATA +347 -0
  76. opencc-1.2.0.dist-info/RECORD +78 -0
  77. opencc-1.2.0.dist-info/WHEEL +5 -0
  78. opencc-1.2.0.dist-info/top_level.txt +1 -0
opencc/__init__.py ADDED
@@ -0,0 +1,49 @@
1
+ import os
2
+
3
+ try:
4
+ import opencc_clib
5
+ except ImportError:
6
+ from opencc.clib import opencc_clib
7
+
8
+ __all__ = ['CONFIGS', 'OpenCC', '__version__']
9
+
10
+ __version__ = opencc_clib.__version__
11
+ _this_dir = os.path.dirname(os.path.abspath(__file__))
12
+ _opencc_share_dir = os.path.join(_this_dir, 'clib', 'share', 'opencc')
13
+ _opencc_rootdir = os.path.abspath(os.path.join(_this_dir, '..', '..'))
14
+ _opencc_configdir = os.path.join(_opencc_rootdir, 'data', 'config')
15
+
16
+ if os.path.isdir(_opencc_share_dir):
17
+ CONFIGS = [f for f in os.listdir(_opencc_share_dir) if f.endswith('.json')]
18
+ elif os.path.isdir(_opencc_configdir):
19
+ CONFIGS = [f for f in os.listdir(_opencc_configdir) if f.endswith('.json')]
20
+ else:
21
+ CONFIGS = []
22
+
23
+
24
+ def _append_path_to_env(name: str, path: str) -> None:
25
+ value = os.environ.get(name, '')
26
+ if path in value: # Path already exists
27
+ return
28
+ if value == '':
29
+ value = path
30
+ else:
31
+ value += f':{path}'
32
+ os.environ[name] = value
33
+
34
+
35
+ class OpenCC(opencc_clib._OpenCC):
36
+
37
+ def __init__(self, config: str = 't2s') -> None:
38
+ if not config.endswith('.json'):
39
+ config += '.json'
40
+ if not os.path.isfile(config):
41
+ config_under_share_dir = os.path.join(_opencc_share_dir, config)
42
+ if os.path.isfile(config_under_share_dir):
43
+ config = config_under_share_dir
44
+ super().__init__(config)
45
+ self.config = config
46
+
47
+ def convert(self, text: str):
48
+ byte_text = text.encode('utf-8')
49
+ return super().convert(byte_text, len(byte_text))
File without changes
opencc/clib/bin/opencc ADDED
Binary file
Binary file
Binary file
@@ -0,0 +1,53 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+ #include "SerializableDict.hpp"
23
+
24
+ namespace opencc {
25
+ /**
26
+ * Binary dictionary for faster deserialization
27
+ * @ingroup opencc_cpp_api
28
+ */
29
+ class OPENCC_EXPORT BinaryDict : public SerializableDict {
30
+ public:
31
+ BinaryDict(const LexiconPtr& _lexicon) : lexicon(_lexicon) {}
32
+
33
+ virtual ~BinaryDict() {}
34
+
35
+ virtual void SerializeToFile(FILE* fp) const;
36
+
37
+ static BinaryDictPtr NewFromFile(FILE* fp);
38
+
39
+ const LexiconPtr& GetLexicon() const { return lexicon; }
40
+
41
+ size_t KeyMaxLength() const;
42
+
43
+ private:
44
+ LexiconPtr lexicon;
45
+ std::string keyBuffer;
46
+ std::string valueBuffer;
47
+
48
+ void ConstructBuffer(std::string& keyBuffer, std::vector<size_t>& keyOffset,
49
+ size_t& keyTotalLength, std::string& valueBuffer,
50
+ std::vector<size_t>& valueOffset,
51
+ size_t& valueTotalLength) const;
52
+ };
53
+ } // namespace opencc
@@ -0,0 +1,82 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ // Microsoft Visual C++ specific
22
+ #if defined(_MSC_VER) && (_MSC_VER >= 1020)
23
+ #pragma warning(disable : 4251 4266 4350 4503 4512 4514 4710 4820)
24
+ #endif
25
+
26
+ #include <cstddef>
27
+ #include <memory>
28
+ #include <string>
29
+ #include <vector>
30
+
31
+ #include "Export.hpp"
32
+ #include "Optional.hpp"
33
+ #include "opencc_config.h"
34
+
35
+ // Forward decalarations and alias
36
+ namespace opencc {
37
+ class Config;
38
+ class Conversion;
39
+ class ConversionChain;
40
+ class Converter;
41
+ class Dict;
42
+ class DictEntry;
43
+ class DictGroup;
44
+ class Lexicon;
45
+ class MarisaDict;
46
+ class MultiValueDictEntry;
47
+ class NoValueDictEntry;
48
+ class Segmentation;
49
+ class Segments;
50
+ class SerializableDict;
51
+ class SingleValueDictEntry;
52
+ class TextDict;
53
+ typedef std::shared_ptr<Conversion> ConversionPtr;
54
+ typedef std::shared_ptr<ConversionChain> ConversionChainPtr;
55
+ typedef std::shared_ptr<Converter> ConverterPtr;
56
+ typedef std::shared_ptr<Dict> DictPtr;
57
+ typedef std::shared_ptr<DictGroup> DictGroupPtr;
58
+ typedef std::shared_ptr<Lexicon> LexiconPtr;
59
+ typedef std::shared_ptr<MarisaDict> MarisaDictPtr;
60
+ typedef std::shared_ptr<Segmentation> SegmentationPtr;
61
+ typedef std::shared_ptr<Segments> SegmentsPtr;
62
+ typedef std::shared_ptr<SerializableDict> SerializableDictPtr;
63
+ typedef std::shared_ptr<TextDict> TextDictPtr;
64
+
65
+ #ifdef OPENCC_ENABLE_DARTS
66
+ class BinaryDict;
67
+ class DartsDict;
68
+ typedef std::shared_ptr<BinaryDict> BinaryDictPtr;
69
+ typedef std::shared_ptr<DartsDict> DartsDictPtr;
70
+ #endif
71
+
72
+ } // namespace opencc
73
+
74
+ #ifndef PKGDATADIR
75
+ const std::string PACKAGE_DATA_DIRECTORY = "";
76
+ #else // ifndef PKGDATADIR
77
+ const std::string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/";
78
+ #endif // ifndef PKGDATADIR
79
+
80
+ #ifndef VERSION
81
+ #define VERSION "1.0.*"
82
+ #endif // ifndef VERSION
@@ -0,0 +1,49 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+
23
+ namespace opencc {
24
+ /**
25
+ * Configuration loader
26
+ * @ingroup opencc_cpp_api
27
+ */
28
+ class OPENCC_EXPORT Config {
29
+ public:
30
+ Config();
31
+
32
+ virtual ~Config();
33
+
34
+ ConverterPtr NewFromString(const std::string& json,
35
+ const std::string& configDirectory);
36
+
37
+ ConverterPtr NewFromString(const std::string& json,
38
+ const std::vector<std::string>& paths);
39
+
40
+ ConverterPtr NewFromFile(const std::string& fileName);
41
+
42
+ ConverterPtr NewFromFile(const std::string& fileName,
43
+ const std::vector<std::string>& paths,
44
+ const char* argv0);
45
+
46
+ private:
47
+ void* internal;
48
+ };
49
+ } // namespace opencc
@@ -0,0 +1,47 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+ #include "Segmentation.hpp"
23
+
24
+ namespace opencc {
25
+ /**
26
+ * Conversion interface
27
+ * @ingroup opencc_cpp_api
28
+ */
29
+ class OPENCC_EXPORT Conversion {
30
+ public:
31
+ Conversion(DictPtr _dict) : dict(_dict) {}
32
+
33
+ // Convert single phrase
34
+ std::string Convert(const std::string& phrase) const;
35
+
36
+ // Convert single phrase
37
+ std::string Convert(const char* phrase) const;
38
+
39
+ // Convert segmented text
40
+ SegmentsPtr Convert(const SegmentsPtr& input) const;
41
+
42
+ const DictPtr GetDict() const { return dict; }
43
+
44
+ private:
45
+ const DictPtr dict;
46
+ };
47
+ } // namespace opencc
@@ -0,0 +1,43 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include <list>
22
+
23
+ #include "Common.hpp"
24
+ #include "Conversion.hpp"
25
+
26
+ namespace opencc {
27
+ /**
28
+ * Chain of conversions
29
+ * Consists of a list of conversions. Converts input in sequence.
30
+ * @ingroup opencc_cpp_api
31
+ */
32
+ class OPENCC_EXPORT ConversionChain {
33
+ public:
34
+ ConversionChain(const std::list<ConversionPtr> _conversions);
35
+
36
+ SegmentsPtr Convert(const SegmentsPtr& input) const;
37
+
38
+ const std::list<ConversionPtr> GetConversions() const { return conversions; }
39
+
40
+ private:
41
+ const std::list<ConversionPtr> conversions;
42
+ };
43
+ } // namespace opencc
@@ -0,0 +1,51 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+ #include "Segmentation.hpp"
23
+
24
+ namespace opencc {
25
+ /**
26
+ * Controller of segmentation and conversion
27
+ * @ingroup opencc_cpp_api
28
+ */
29
+ class OPENCC_EXPORT Converter {
30
+ public:
31
+ Converter(const std::string& _name, SegmentationPtr _segmentation,
32
+ ConversionChainPtr _conversionChain)
33
+ : name(_name), segmentation(_segmentation),
34
+ conversionChain(_conversionChain) {}
35
+
36
+ std::string Convert(const std::string& text) const;
37
+
38
+ size_t Convert(const char* input, char* output) const;
39
+
40
+ const SegmentationPtr GetSegmentation() const { return segmentation; }
41
+
42
+ const ConversionChainPtr GetConversionChain() const {
43
+ return conversionChain;
44
+ }
45
+
46
+ private:
47
+ const std::string name;
48
+ const SegmentationPtr segmentation;
49
+ const ConversionChainPtr conversionChain;
50
+ };
51
+ } // namespace opencc
@@ -0,0 +1,60 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+ #include "SerializableDict.hpp"
23
+
24
+ namespace opencc {
25
+ /**
26
+ * Darts dictionary
27
+ * @ingroup opencc_cpp_api
28
+ */
29
+ class OPENCC_EXPORT DartsDict : public Dict, public SerializableDict {
30
+ public:
31
+ virtual ~DartsDict();
32
+
33
+ virtual size_t KeyMaxLength() const;
34
+
35
+ virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
36
+
37
+ virtual Optional<const DictEntry*> MatchPrefix(const char* word,
38
+ size_t len) const;
39
+
40
+ virtual LexiconPtr GetLexicon() const;
41
+
42
+ virtual void SerializeToFile(FILE* fp) const;
43
+
44
+ /**
45
+ * Constructs a DartsDict from another dictionary.
46
+ */
47
+ static DartsDictPtr NewFromDict(const Dict& thatDict);
48
+
49
+ static DartsDictPtr NewFromFile(FILE* fp);
50
+
51
+ private:
52
+ DartsDict();
53
+
54
+ size_t maxLength;
55
+ LexiconPtr lexicon;
56
+
57
+ class DartsInternal;
58
+ DartsInternal* internal;
59
+ };
60
+ } // namespace opencc
@@ -0,0 +1,92 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+ #include "DictEntry.hpp"
23
+
24
+ namespace opencc {
25
+ /**
26
+ * Abstract class of dictionary
27
+ * @ingroup opencc_cpp_api
28
+ */
29
+ class OPENCC_EXPORT Dict {
30
+ public:
31
+ /**
32
+ * Matches a word exactly and returns the DictEntry or Optional::Null().
33
+ */
34
+ virtual Optional<const DictEntry*> Match(const char* word,
35
+ size_t len) const = 0;
36
+
37
+ /**
38
+ * Matches a word exactly and returns the DictEntry or Optional::Null().
39
+ */
40
+ Optional<const DictEntry*> Match(const std::string& word) const {
41
+ return Match(word.c_str(), word.length());
42
+ }
43
+
44
+ /**
45
+ * Matches the longest matched prefix of a word.
46
+ * For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
47
+ * the longest prefix of "banana" matched is "bana".
48
+ */
49
+ virtual Optional<const DictEntry*> MatchPrefix(const char* word,
50
+ size_t len) const;
51
+
52
+ /**
53
+ * Matches the longest matched prefix of a word.
54
+ */
55
+ Optional<const DictEntry*> MatchPrefix(const char* word) const {
56
+ return MatchPrefix(word, KeyMaxLength());
57
+ }
58
+
59
+ /**
60
+ * Matches the longest matched prefix of a word.
61
+ */
62
+ Optional<const DictEntry*> MatchPrefix(const std::string& word) const {
63
+ return MatchPrefix(word.c_str(), word.length());
64
+ }
65
+
66
+ /**
67
+ * Returns all matched prefixes of a word, sorted by the length (desc).
68
+ * For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
69
+ * all the matched prefixes of "banana" are "bana", "ban", "ba", "b".
70
+ */
71
+ virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
72
+ size_t len) const;
73
+
74
+ /**
75
+ * Returns all matched prefixes of a word, sorted by the length (desc).
76
+ */
77
+ std::vector<const DictEntry*>
78
+ MatchAllPrefixes(const std::string& word) const {
79
+ return MatchAllPrefixes(word.c_str(), word.length());
80
+ }
81
+
82
+ /**
83
+ * Returns the length of the longest key in the dictionary.
84
+ */
85
+ virtual size_t KeyMaxLength() const = 0;
86
+
87
+ /**
88
+ * Returns all entries in the dictionary.
89
+ */
90
+ virtual LexiconPtr GetLexicon() const = 0;
91
+ };
92
+ } // namespace opencc
@@ -0,0 +1,32 @@
1
+ /*
2
+ * Open Chinese Convert
3
+ *
4
+ * Copyright 2010-2017 Carbo Kuo <byvoid@byvoid.com>
5
+ *
6
+ * Licensed under the Apache License, Version 2.0 (the "License");
7
+ * you may not use this file except in compliance with the License.
8
+ * You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing, software
13
+ * distributed under the License is distributed on an "AS IS" BASIS,
14
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ * See the License for the specific language governing permissions and
16
+ * limitations under the License.
17
+ */
18
+
19
+ #pragma once
20
+
21
+ #include "Common.hpp"
22
+
23
+ namespace opencc {
24
+ /**
25
+ * Converts a dictionary from a format to another.
26
+ * @ingroup opencc_cpp_api
27
+ */
28
+ OPENCC_EXPORT void ConvertDictionary(const std::string& inputFileName,
29
+ const std::string& outputFileName,
30
+ const std::string& formatFrom,
31
+ const std::string& formatTo);
32
+ } // namespace opencc