sentencepiece 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52d4bb0f1a0b4a68c9db252911fe0661b7d8042d2d976a17506e0eaf0005d48f
4
- data.tar.gz: 793b8b3e47cb6a9c1ab6b05b4cbef264b16eef7b632699983de9f8c40056d9ac
3
+ metadata.gz: 63aaf69a72781f0087072db8197b9ae3a126b63e632b52bded5b483ff7b2c6b4
4
+ data.tar.gz: 5ffbd48ed587ef57fd3b427615cc21f4d79804ea90598f9db2178ceff702ca57
5
5
  SHA512:
6
- metadata.gz: 9702468ce33efdf7ae2ac3735cec983ae58a50677357a2d6cbff88089e73bf40b81ef9872b9860474c948fed3f5920a093252e90fbdea2d5e188c41057c7923e
7
- data.tar.gz: ba262d347b32364255ecbbce8306b2d126296b46e6aac7828903c0dc9d6485702ef153ed7aea83a79eb69b5bfc655482787f6bb1c3ad086215f9106658a3a3e8
6
+ metadata.gz: 61b91c838bbdb2f7c47166036b890b545060361e91a66e6cf416206f53d84595cc74052a38df083ad507a6f219d0b2b8ba3667095d2d3514b5ba0c3660967ee5
7
+ data.tar.gz: 467c2f252e23fbfe684486d42af9fe0eedd058dd99361fc5652bb83412ae801571e5e25879b2f1bb6734c997b9c04f4287378c595afa3698b06885631c8fdcae
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.0] - 2023-03-26
4
+
5
+ - Add API documentation.
6
+ - Add type signatures.
7
+
3
8
  ## [0.0.2] - 2023-03-26
4
9
 
5
10
  - Add SentencePieceTrainer class.
data/README.md CHANGED
@@ -3,12 +3,11 @@
3
3
  [![Build Status](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml/badge.svg)](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml)
4
4
  [![Gem Version](https://badge.fury.io/rb/sentencepiece.svg)](https://badge.fury.io/rb/sentencepiece)
5
5
  [![License](https://img.shields.io/badge/License-Apache%202.0-yellowgreen.svg)](https://github.com/yoshoku/sentencepiece.rb/blob/main/LICENSE.txt)
6
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/sentencepiece.rb/doc/)
6
7
 
7
8
  sentencepiece.rb provides Ruby bindings for the [SentencePiece](https://github.com/google/sentencepiece),
8
9
  an unsupervised text tokenizer and detokenizer for neural network-based text generation.
9
10
 
10
- It is still **under development** and may undergo many changes in the future.
11
-
12
11
  ## Installation
13
12
 
14
13
  Install SentencePiece using your OS package manager;
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # sentencepiece.rb provides Ruby bindings for the SentencePiece.
3
4
  module SentencePiece
4
- VERSION = '0.0.2'
5
+ # The version of sentencepiece.rb you install.
6
+ VERSION = '0.1.0'
5
7
  end
@@ -1,4 +1,63 @@
1
1
  module SentencePiece
2
2
  VERSION: String
3
- # See the writing guide of rbs: https://github.com/ruby/rbs#guides
3
+
4
+ class SentencePieceTrainer
5
+ def self.train: (String args) -> void
6
+ end
7
+
8
+ class SentencePieceProcessor
9
+ def initialize: (?model_file: String model_file) -> void
10
+
11
+ def load: (String model_file) -> void
12
+
13
+ def encode: (String text, ?out_type: String out_type) -> (Array[Integer] | Array[String])
14
+ | (Array[String] text, ?out_type: String out_type) -> (Array[Array[Integer]] | Array[Array[String]])
15
+
16
+ def encode_as_ids: (String text) -> Array[Integer]
17
+
18
+ def encode_as_pieces: (String text) -> Array[String]
19
+
20
+ def encode_as_serialized_proto: (String text) -> String
21
+
22
+ def nbest_encode_as_ids: (String text, nbest_size: Integer nbest_size) -> Array[Array[Integer]]
23
+
24
+ def nbest_encode_as_pieces: (String text, nbest_size: Integer nbest_size) -> Array[Array[String]]
25
+
26
+ def nbest_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size) -> String
27
+
28
+ def sample_encode_as_ids: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[Integer]
29
+
30
+ def sample_encode_as_pieces: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[String]
31
+
32
+ def sample_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> String
33
+
34
+ def decode: (Array[Integer], ?out_type: String out_type) -> String
35
+ | (Array[Array[Integer]], ?out_type: String out_type) -> Array[String]
36
+ | (Array[String], ?out_type: String out_type) -> String
37
+ | (Array[Array[String]], ?out_type: String out_type) -> Array[String]
38
+
39
+ def decode_ids: (Array[Integer]) -> String
40
+
41
+ def decode_ids_as_serialized_proto: (Array[Integer] ids) -> String
42
+
43
+ def decode_pieces: (Array[String]) -> String
44
+
45
+ def decode_pieces_as_serialized_proto: (Array[String] pieces) -> String
46
+
47
+ def id_to_piece: (Integer id) -> String
48
+ | (Array[Integer] ids) -> Array[String]
49
+
50
+ def piece_to_id: (String piece) -> Integer
51
+ | (Array[String] pieces) -> Array[Integer]
52
+
53
+ def piece_size: () -> Integer
54
+
55
+ def bos_id: () -> Integer
56
+
57
+ def eos_id: () -> Integer
58
+
59
+ def pad_id: () -> Integer
60
+
61
+ def unk_id: () -> Integer
62
+ end
4
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sentencepiece
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
@@ -36,6 +36,7 @@ metadata:
36
36
  homepage_uri: https://github.com/yoshoku/sentencepiece.rb
37
37
  source_code_uri: https://github.com/yoshoku/sentencepiece.rb
38
38
  changelog_uri: https://github.com/yoshoku/sentencepiece.rb/blob/main/CHANGELOG.md
39
+ documentation_uri: https://yoshoku.github.io/sentencepiece.rb/doc/
39
40
  rubygems_mfa_required: 'true'
40
41
  post_install_message:
41
42
  rdoc_options: []