sentencepiece 0.0.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52d4bb0f1a0b4a68c9db252911fe0661b7d8042d2d976a17506e0eaf0005d48f
4
- data.tar.gz: 793b8b3e47cb6a9c1ab6b05b4cbef264b16eef7b632699983de9f8c40056d9ac
3
+ metadata.gz: 45cc838c2108753b87e03e3aee4bf3665f1e8c6e0cd407e5a68355b44657c582
4
+ data.tar.gz: 14d29d47f2a78b57218ceaf45c12a30de270f000c5d6d756b6b66a767a2ee128
5
5
  SHA512:
6
- metadata.gz: 9702468ce33efdf7ae2ac3735cec983ae58a50677357a2d6cbff88089e73bf40b81ef9872b9860474c948fed3f5920a093252e90fbdea2d5e188c41057c7923e
7
- data.tar.gz: ba262d347b32364255ecbbce8306b2d126296b46e6aac7828903c0dc9d6485702ef153ed7aea83a79eb69b5bfc655482787f6bb1c3ad086215f9106658a3a3e8
6
+ metadata.gz: ac0b780ec734901f030bbfdcf91949401b40064d89480fa54d303d7d83f707e42e0ba897cf342ea4650402b350deac2da4c1017cac7c278adb1272e5cee0834e
7
+ data.tar.gz: b03e10826137a4d3d1002906af0e8e46014eafed08da76fb5494e7b5311deaf002b40b7771b7299ef1b39cb2349e1fa9f5f3262020479d2655a2ede9c3db4f03
data/CHANGELOG.md CHANGED
@@ -1,6 +1,13 @@
1
- ## [Unreleased]
1
+ ## [[0.2.0](https://github.com/yoshoku/sentencepiece.rb/compare/v0.1.0...v0.2.0)] - 2025-12-29
2
2
 
3
- ## [0.0.2] - 2023-03-26
3
+ - Change to specify `c++17` with -std option as recent sentencepiece uses `std::string_view`.
4
+
5
+ ## [[0.1.0](https://github.com/yoshoku/sentencepiece.rb/compare/v0.0.2...v0.1.0)] - 2023-03-26
6
+
7
+ - Add API documentation.
8
+ - Add type signatures.
9
+
10
+ ## [[0.0.2](https://github.com/yoshoku/sentencepiece.rb/compare/v0.0.1...v0.0.2)] - 2023-03-26
4
11
 
5
12
  - Add SentencePieceTrainer class.
6
13
  - Add some encoding and decoding methods to SentencePieceProcessor.
data/README.md CHANGED
@@ -3,12 +3,11 @@
3
3
  [![Build Status](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml/badge.svg)](https://github.com/yoshoku/sentencepiece.rb/actions/workflows/main.yml)
4
4
  [![Gem Version](https://badge.fury.io/rb/sentencepiece.svg)](https://badge.fury.io/rb/sentencepiece)
5
5
  [![License](https://img.shields.io/badge/License-Apache%202.0-yellowgreen.svg)](https://github.com/yoshoku/sentencepiece.rb/blob/main/LICENSE.txt)
6
+ [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/sentencepiece.rb/doc/)
6
7
 
7
8
  sentencepiece.rb provides Ruby bindings for the [SentencePiece](https://github.com/google/sentencepiece),
8
9
  an unsupervised text tokenizer and detokenizer for neural network-based text generation.
9
10
 
10
- It is still **under development** and may undergo many changes in the future.
11
-
12
11
  ## Installation
13
12
 
14
13
  Install SentencePiece using your OS package manager;
@@ -9,6 +9,10 @@ abort 'libsentencepiece_train is not found.' unless have_library('sentencepiece_
9
9
  # abort 'sentencepiece_processor.h is not found.' unless have_header('sentencepiece_processor.h')
10
10
  # abort 'sentencepiece_trainer.h is not found.' unless have_header('sentencepiece_trainer.h')
11
11
 
12
- $CXXFLAGS << ' -std=c++11'
12
+ $CXXFLAGS << if /mswin/ =~ RUBY_PLATFORM
13
+ '/std:c++17'
14
+ else
15
+ ' -std=c++17'
16
+ end
13
17
 
14
18
  create_makefile('sentencepiece/sentencepiece')
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * sentencepiece.rb provides Ruby bindings for the SentencePiece.
3
3
  *
4
- * Copyright (c) 2023 Atsushi Tatsuma
4
+ * Copyright (c) 2023-2025 Atsushi Tatsuma
5
5
  *
6
6
  * Licensed under the Apache License, Version 2.0 (the "License");
7
7
  * you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * sentencepiece.rb provides Ruby bindings for the SentencePiece.
3
3
  *
4
- * Copyright (c) 2023 Atsushi Tatsuma
4
+ * Copyright (c) 2023-2025 Atsushi Tatsuma
5
5
  *
6
6
  * Licensed under the Apache License, Version 2.0 (the "License");
7
7
  * you may not use this file except in compliance with the License.
@@ -79,6 +79,7 @@ public:
79
79
  rb_define_method(rb_cSentencePieceProcessor, "bos_id", RUBY_METHOD_FUNC(_sentencepiece_processor_bos_id), 0);
80
80
  rb_define_method(rb_cSentencePieceProcessor, "eos_id", RUBY_METHOD_FUNC(_sentencepiece_processor_eos_id), 0);
81
81
  rb_define_method(rb_cSentencePieceProcessor, "pad_id", RUBY_METHOD_FUNC(_sentencepiece_processor_pad_id), 0);
82
+ rb_define_alias(rb_cSentencePieceProcessor, "vocab_size", "piece_size");
82
83
  return rb_cSentencePieceProcessor;
83
84
  };
84
85
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # sentencepiece.rb provides Ruby bindings for the SentencePiece.
3
4
  module SentencePiece
4
- VERSION = '0.0.2'
5
+ # The version of sentencepiece.rb you install.
6
+ VERSION = '0.2.0'
5
7
  end
@@ -1,4 +1,63 @@
1
1
  module SentencePiece
2
2
  VERSION: String
3
- # See the writing guide of rbs: https://github.com/ruby/rbs#guides
3
+
4
+ class SentencePieceTrainer
5
+ def self.train: (String args) -> void
6
+ end
7
+
8
+ class SentencePieceProcessor
9
+ def initialize: (?model_file: String model_file) -> void
10
+
11
+ def load: (String model_file) -> void
12
+
13
+ def encode: (String text, ?out_type: String out_type) -> (Array[Integer] | Array[String])
14
+ | (Array[String] text, ?out_type: String out_type) -> (Array[Array[Integer]] | Array[Array[String]])
15
+
16
+ def encode_as_ids: (String text) -> Array[Integer]
17
+
18
+ def encode_as_pieces: (String text) -> Array[String]
19
+
20
+ def encode_as_serialized_proto: (String text) -> String
21
+
22
+ def nbest_encode_as_ids: (String text, nbest_size: Integer nbest_size) -> Array[Array[Integer]]
23
+
24
+ def nbest_encode_as_pieces: (String text, nbest_size: Integer nbest_size) -> Array[Array[String]]
25
+
26
+ def nbest_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size) -> String
27
+
28
+ def sample_encode_as_ids: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[Integer]
29
+
30
+ def sample_encode_as_pieces: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> Array[String]
31
+
32
+ def sample_encode_as_serialized_proto: (String text, nbest_size: Integer nbest_size, alpha: Float alpha) -> String
33
+
34
+ def decode: (Array[Integer], ?out_type: String out_type) -> String
35
+ | (Array[Array[Integer]], ?out_type: String out_type) -> Array[String]
36
+ | (Array[String], ?out_type: String out_type) -> String
37
+ | (Array[Array[String]], ?out_type: String out_type) -> Array[String]
38
+
39
+ def decode_ids: (Array[Integer]) -> String
40
+
41
+ def decode_ids_as_serialized_proto: (Array[Integer] ids) -> String
42
+
43
+ def decode_pieces: (Array[String]) -> String
44
+
45
+ def decode_pieces_as_serialized_proto: (Array[String] pieces) -> String
46
+
47
+ def id_to_piece: (Integer id) -> String
48
+ | (Array[Integer] ids) -> Array[String]
49
+
50
+ def piece_to_id: (String piece) -> Integer
51
+ | (Array[String] pieces) -> Array[Integer]
52
+
53
+ def piece_size: () -> Integer
54
+
55
+ def bos_id: () -> Integer
56
+
57
+ def eos_id: () -> Integer
58
+
59
+ def pad_id: () -> Integer
60
+
61
+ def unk_id: () -> Integer
62
+ end
4
63
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sentencepiece
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2023-03-26 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies: []
13
12
  description: |
14
13
  sentencepiece.rb provides Ruby bindings for the SentencePiece,
@@ -36,8 +35,8 @@ metadata:
36
35
  homepage_uri: https://github.com/yoshoku/sentencepiece.rb
37
36
  source_code_uri: https://github.com/yoshoku/sentencepiece.rb
38
37
  changelog_uri: https://github.com/yoshoku/sentencepiece.rb/blob/main/CHANGELOG.md
38
+ documentation_uri: https://yoshoku.github.io/sentencepiece.rb/doc/
39
39
  rubygems_mfa_required: 'true'
40
- post_install_message:
41
40
  rdoc_options: []
42
41
  require_paths:
43
42
  - lib
@@ -52,8 +51,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
51
  - !ruby/object:Gem::Version
53
52
  version: '0'
54
53
  requirements: []
55
- rubygems_version: 3.3.26
56
- signing_key:
54
+ rubygems_version: 4.0.3
57
55
  specification_version: 4
58
56
  summary: Ruby bindings for the SentencePiece
59
57
  test_files: []