sentencepiece 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +1 -2
- data/lib/sentencepiece/version.rb +3 -1
- data/sig/sentencepiece.rbs +60 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63aaf69a72781f0087072db8197b9ae3a126b63e632b52bded5b483ff7b2c6b4
|
4
|
+
data.tar.gz: 5ffbd48ed587ef57fd3b427615cc21f4d79804ea90598f9db2178ceff702ca57
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61b91c838bbdb2f7c47166036b890b545060361e91a66e6cf416206f53d84595cc74052a38df083ad507a6f219d0b2b8ba3667095d2d3514b5ba0c3660967ee5
|
7
|
+
data.tar.gz: 467c2f252e23fbfe684486d42af9fe0eedd058dd99361fc5652bb83412ae801571e5e25879b2f1bb6734c997b9c04f4287378c595afa3698b06885631c8fdcae
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -3,12 +3,11 @@
|
|
3
3
|
[](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml)
|
4
4
|
[](https://badge.fury.io/rb/sentencepiece)
|
5
5
|
[](https://github.com/yoshoku/sentencepiece.rb/blob/main/LICENSE.txt)
|
6
|
+
[](https://yoshoku.github.io/sentencepiece.rb/doc/)
|
6
7
|
|
7
8
|
sentencepiece.rb provides Ruby bindings for the [SentencePiece](https://github.com/google/sentencepiece),
|
8
9
|
an unsupervised text tokenizer and detokenizer for neural network-based text generation.
|
9
10
|
|
10
|
-
It is still **under development** and may undergo many changes in the future.
|
11
|
-
|
12
11
|
## Installation
|
13
12
|
|
14
13
|
Install SentencePiece using your OS package manager;
|
data/sig/sentencepiece.rbs
CHANGED
@@ -1,4 +1,63 @@
|
|
1
1
|
module SentencePiece
|
2
2
|
VERSION: String
|
3
|
-
|
3
|
+
|
4
|
+
class SentencePieceTrainer
|
5
|
+
def self.train: (String args) -> void
|
6
|
+
end
|
7
|
+
|
8
|
+
class SentencePieceProcessor
|
9
|
+
def initialize: (?model_file: String model_file) -> void
|
10
|
+
|
11
|
+
def load: (String model_file) -> void
|
12
|
+
|
13
|
+
def encode: (String text, ?out_type: String out_type) -> (Array[Integer] | Array[String])
|
14
|
+
| (Array[String] text, ?out_type: String out_type) -> (Array[Array[Integer]] | Array[Array[String]])
|
15
|
+
|
16
|
+
def encode_as_ids: (String text) -> Array[Integer]
|
17
|
+
|
18
|
+
def encode_as_pieces: (String text) -> Array[String]
|
19
|
+
|
20
|
+
def encode_as_serialized_proto: (String text) -> String
|
21
|
+
|
22
|
+
def nbest_encode_as_ids: (String text, nbest_size: Integer nbest_size) -> Array[Array[Integer]]
|
23
|
+
|
24
|
+
def nbest_encode_as_pieces: (String text, nbest_size: Integer nbest_size) -> Array[Array[String]]
|
25
|
+
|
26
|
+
def nbest_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size) -> String
|
27
|
+
|
28
|
+
def sample_encode_as_ids: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[Integer]
|
29
|
+
|
30
|
+
def sample_encode_as_pieces: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[String]
|
31
|
+
|
32
|
+
def sample_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> String
|
33
|
+
|
34
|
+
def decode: (Array[Integer], ?out_type: String out_type) -> String
|
35
|
+
| (Array[Array[Integer]], ?out_type: String out_type) -> Array[String]
|
36
|
+
| (Array[String], ?out_type: String out_type) -> String
|
37
|
+
| (Array[Array[String]], ?out_type: String out_type) -> Array[String]
|
38
|
+
|
39
|
+
def decode_ids: (Array[Integer]) -> String
|
40
|
+
|
41
|
+
def decode_ids_as_serialized_proto: (Array[Integer] ids) -> String
|
42
|
+
|
43
|
+
def decode_pieces: (Array[String]) -> String
|
44
|
+
|
45
|
+
def decode_pieces_as_serialized_proto: (Array[String] pieces) -> String
|
46
|
+
|
47
|
+
def id_to_piece: (Integer id) -> String
|
48
|
+
| (Array[Integer] ids) -> Array[String]
|
49
|
+
|
50
|
+
def piece_to_id: (String piece) -> Integer
|
51
|
+
| (Array[String] pieces) -> Array[Integer]
|
52
|
+
|
53
|
+
def piece_size: () -> Integer
|
54
|
+
|
55
|
+
def bos_id: () -> Integer
|
56
|
+
|
57
|
+
def eos_id: () -> Integer
|
58
|
+
|
59
|
+
def pad_id: () -> Integer
|
60
|
+
|
61
|
+
def unk_id: () -> Integer
|
62
|
+
end
|
4
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentencepiece
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
@@ -36,6 +36,7 @@ metadata:
|
|
36
36
|
homepage_uri: https://github.com/yoshoku/sentencepiece.rb
|
37
37
|
source_code_uri: https://github.com/yoshoku/sentencepiece.rb
|
38
38
|
changelog_uri: https://github.com/yoshoku/sentencepiece.rb/blob/main/CHANGELOG.md
|
39
|
+
documentation_uri: https://yoshoku.github.io/sentencepiece.rb/doc/
|
39
40
|
rubygems_mfa_required: 'true'
|
40
41
|
post_install_message:
|
41
42
|
rdoc_options: []
|