sentencepiece 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52d4bb0f1a0b4a68c9db252911fe0661b7d8042d2d976a17506e0eaf0005d48f
4
- data.tar.gz: 793b8b3e47cb6a9c1ab6b05b4cbef264b16eef7b632699983de9f8c40056d9ac
3
+ metadata.gz: 63aaf69a72781f0087072db8197b9ae3a126b63e632b52bded5b483ff7b2c6b4
4
+ data.tar.gz: 5ffbd48ed587ef57fd3b427615cc21f4d79804ea90598f9db2178ceff702ca57
5
5
  SHA512:
6
- metadata.gz: 9702468ce33efdf7ae2ac3735cec983ae58a50677357a2d6cbff88089e73bf40b81ef9872b9860474c948fed3f5920a093252e90fbdea2d5e188c41057c7923e
7
- data.tar.gz: ba262d347b32364255ecbbce8306b2d126296b46e6aac7828903c0dc9d6485702ef153ed7aea83a79eb69b5bfc655482787f6bb1c3ad086215f9106658a3a3e8
6
+ metadata.gz: 61b91c838bbdb2f7c47166036b890b545060361e91a66e6cf416206f53d84595cc74052a38df083ad507a6f219d0b2b8ba3667095d2d3514b5ba0c3660967ee5
7
+ data.tar.gz: 467c2f252e23fbfe684486d42af9fe0eedd058dd99361fc5652bb83412ae801571e5e25879b2f1bb6734c997b9c04f4287378c595afa3698b06885631c8fdcae
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.0] - 2023-03-26
4
+
5
+ - Add API documentation.
6
+ - Add type signatures.
7
+
3
8
  ## [0.0.2] - 2023-03-26
4
9
 
5
10
  - Add SentencePieceTrainer class.
data/README.md CHANGED
@@ -3,12 +3,11 @@
3
3
  [![Build Status](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml/badge.svg)](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml)
4
4
  [![Gem Version](https://badge.fury.io/rb/sentencepiece.svg)](https://badge.fury.io/rb/sentencepiece)
5
5
  [![License](https://img.shields.io/badge/License-Apache%202.0-yellowgreen.svg)](https://github.com/yoshoku/sentencepiece.rb/blob/main/LICENSE.txt)
6
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/sentencepiece.rb/doc/)
6
7
 
7
8
  sentencepiece.rb provides Ruby bindings for the [SentencePiece](https://github.com/google/sentencepiece),
8
9
  an unsupervised text tokenizer and detokenizer for neural network-based text generation.
9
10
 
10
- It is still **under development** and may undergo many changes in the future.
11
-
12
11
  ## Installation
13
12
 
14
13
  Install SentencePiece using your OS package manager;
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # sentencepiece.rb provides Ruby bindings for the SentencePiece.
3
4
  module SentencePiece
4
- VERSION = '0.0.2'
5
+ # The version of sentencepiece.rb you install.
6
+ VERSION = '0.1.0'
5
7
  end
@@ -1,4 +1,63 @@
1
1
  module SentencePiece
2
2
  VERSION: String
3
- # See the writing guide of rbs: https://github.com/ruby/rbs#guides
3
+
4
+ class SentencePieceTrainer
5
+ def self.train: (String args) -> void
6
+ end
7
+
8
+ class SentencePieceProcessor
9
+ def initialize: (?model_file: String model_file) -> void
10
+
11
+ def load: (String model_file) -> void
12
+
13
+ def encode: (String text, ?out_type: String out_type) -> (Array[Integer] | Array[String])
14
+ | (Array[String] text, ?out_type: String out_type) -> (Array[Array[Integer]] | Array[Array[String]])
15
+
16
+ def encode_as_ids: (String text) -> Array[Integer]
17
+
18
+ def encode_as_pieces: (String text) -> Array[String]
19
+
20
+ def encode_as_serialized_proto: (String text) -> String
21
+
22
+ def nbest_encode_as_ids: (String text, nbest_size: Integer nbest_size) -> Array[Array[Integer]]
23
+
24
+ def nbest_encode_as_pieces: (String text, nbest_size: Integer nbest_size) -> Array[Array[String]]
25
+
26
+ def nbest_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size) -> String
27
+
28
+ def sample_encode_as_ids: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[Integer]
29
+
30
+ def sample_encode_as_pieces: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[String]
31
+
32
+ def sample_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> String
33
+
34
+ def decode: (Array[Integer], ?out_type: String out_type) -> String
35
+ | (Array[Array[Integer]], ?out_type: String out_type) -> Array[String]
36
+ | (Array[String], ?out_type: String out_type) -> String
37
+ | (Array[Array[String]], ?out_type: String out_type) -> Array[String]
38
+
39
+ def decode_ids: (Array[Integer]) -> String
40
+
41
+ def decode_ids_as_serialized_proto: (Array[Integer] ids) -> String
42
+
43
+ def decode_pieces: (Array[String]) -> String
44
+
45
+ def decode_pieces_as_serialized_proto: (Array[String] pieces) -> String
46
+
47
+ def id_to_piece: (Integer id) -> String
48
+ | (Array[Integer] ids) -> Array[String]
49
+
50
+ def piece_to_id: (String piece) -> Integer
51
+ | (Array[String] pieces) -> Array[Integer]
52
+
53
+ def piece_size: () -> Integer
54
+
55
+ def bos_id: () -> Integer
56
+
57
+ def eos_id: () -> Integer
58
+
59
+ def pad_id: () -> Integer
60
+
61
+ def unk_id: () -> Integer
62
+ end
4
63
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sentencepiece
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
@@ -36,6 +36,7 @@ metadata:
36
36
  homepage_uri: https://github.com/yoshoku/sentencepiece.rb
37
37
  source_code_uri: https://github.com/yoshoku/sentencepiece.rb
38
38
  changelog_uri: https://github.com/yoshoku/sentencepiece.rb/blob/main/CHANGELOG.md
39
+ documentation_uri: https://yoshoku.github.io/sentencepiece.rb/doc/
39
40
  rubygems_mfa_required: 'true'
40
41
  post_install_message:
41
42
  rdoc_options: []