sentencepiece 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +1 -2
- data/lib/sentencepiece/version.rb +3 -1
- data/sig/sentencepiece.rbs +60 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63aaf69a72781f0087072db8197b9ae3a126b63e632b52bded5b483ff7b2c6b4
|
4
|
+
data.tar.gz: 5ffbd48ed587ef57fd3b427615cc21f4d79804ea90598f9db2178ceff702ca57
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61b91c838bbdb2f7c47166036b890b545060361e91a66e6cf416206f53d84595cc74052a38df083ad507a6f219d0b2b8ba3667095d2d3514b5ba0c3660967ee5
|
7
|
+
data.tar.gz: 467c2f252e23fbfe684486d42af9fe0eedd058dd99361fc5652bb83412ae801571e5e25879b2f1bb6734c997b9c04f4287378c595afa3698b06885631c8fdcae
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -3,12 +3,11 @@
|
|
3
3
|
[![Build Status](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml/badge.svg)](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml)
|
4
4
|
[![Gem Version](https://badge.fury.io/rb/sentencepiece.svg)](https://badge.fury.io/rb/sentencepiece)
|
5
5
|
[![License](https://img.shields.io/badge/License-Apache%202.0-yellowgreen.svg)](https://github.com/yoshoku/sentencepiece.rb/blob/main/LICENSE.txt)
|
6
|
+
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/sentencepiece.rb/doc/)
|
6
7
|
|
7
8
|
sentencepiece.rb provides Ruby bindings for the [SentencePiece](https://github.com/google/sentencepiece),
|
8
9
|
an unsupervised text tokenizer and detokenizer for neural network-based text generation.
|
9
10
|
|
10
|
-
It is still **under development** and may undergo many changes in the future.
|
11
|
-
|
12
11
|
## Installation
|
13
12
|
|
14
13
|
Install SentencePiece using your OS package manager;
|
data/sig/sentencepiece.rbs
CHANGED
@@ -1,4 +1,63 @@
|
|
1
1
|
module SentencePiece
|
2
2
|
VERSION: String
|
3
|
-
|
3
|
+
|
4
|
+
class SentencePieceTrainer
|
5
|
+
def self.train: (String args) -> void
|
6
|
+
end
|
7
|
+
|
8
|
+
class SentencePieceProcessor
|
9
|
+
def initialize: (?model_file: String model_file) -> void
|
10
|
+
|
11
|
+
def load: (String model_file) -> void
|
12
|
+
|
13
|
+
def encode: (String text, ?out_type: String out_type) -> (Array[Integer] | Array[String])
|
14
|
+
| (Array[String] text, ?out_type: String out_type) -> (Array[Array[Integer]] | Array[Array[String]])
|
15
|
+
|
16
|
+
def encode_as_ids: (String text) -> Array[Integer]
|
17
|
+
|
18
|
+
def encode_as_pieces: (String text) -> Array[String]
|
19
|
+
|
20
|
+
def encode_as_serialized_proto: (String text) -> String
|
21
|
+
|
22
|
+
def nbest_encode_as_ids: (String text, nbest_size: Integer nbest_size) -> Array[Array[Integer]]
|
23
|
+
|
24
|
+
def nbest_encode_as_pieces: (String text, nbest_size: Integer nbest_size) -> Array[Array[String]]
|
25
|
+
|
26
|
+
def nbest_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size) -> String
|
27
|
+
|
28
|
+
def sample_encode_as_ids: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[Integer]
|
29
|
+
|
30
|
+
def sample_encode_as_pieces: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[String]
|
31
|
+
|
32
|
+
def sample_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> String
|
33
|
+
|
34
|
+
def decode: (Array[Integer], ?out_type: String out_type) -> String
|
35
|
+
| (Array[Array[Integer]], ?out_type: String out_type) -> Array[String]
|
36
|
+
| (Array[String], ?out_type: String out_type) -> String
|
37
|
+
| (Array[Array[String]], ?out_type: String out_type) -> Array[String]
|
38
|
+
|
39
|
+
def decode_ids: (Array[Integer]) -> String
|
40
|
+
|
41
|
+
def decode_ids_as_serialized_proto: (Array[Integer] ids) -> String
|
42
|
+
|
43
|
+
def decode_pieces: (Array[String]) -> String
|
44
|
+
|
45
|
+
def decode_pieces_as_serialized_proto: (Array[String] pieces) -> String
|
46
|
+
|
47
|
+
def id_to_piece: (Integer id) -> String
|
48
|
+
| (Array[Integer] ids) -> Array[String]
|
49
|
+
|
50
|
+
def piece_to_id: (String piece) -> Integer
|
51
|
+
| (Array[String] pieces) -> Array[Integer]
|
52
|
+
|
53
|
+
def piece_size: () -> Integer
|
54
|
+
|
55
|
+
def bos_id: () -> Integer
|
56
|
+
|
57
|
+
def eos_id: () -> Integer
|
58
|
+
|
59
|
+
def pad_id: () -> Integer
|
60
|
+
|
61
|
+
def unk_id: () -> Integer
|
62
|
+
end
|
4
63
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sentencepiece
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
@@ -36,6 +36,7 @@ metadata:
|
|
36
36
|
homepage_uri: https://github.com/yoshoku/sentencepiece.rb
|
37
37
|
source_code_uri: https://github.com/yoshoku/sentencepiece.rb
|
38
38
|
changelog_uri: https://github.com/yoshoku/sentencepiece.rb/blob/main/CHANGELOG.md
|
39
|
+
documentation_uri: https://yoshoku.github.io/sentencepiece.rb/doc/
|
39
40
|
rubygems_mfa_required: 'true'
|
40
41
|
post_install_message:
|
41
42
|
rdoc_options: []
|