tiktoken_ruby 0.0.2-x86_64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.standard.yml +3 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +77 -0
- data/LICENSE.txt +21 -0
- data/README.md +37 -0
- data/Rakefile +24 -0
- data/lib/tiktoken_ruby/2.7/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/3.0/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/3.1/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/3.2/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/encoding.rb +23 -0
- data/lib/tiktoken_ruby/version.rb +5 -0
- data/lib/tiktoken_ruby.rb +15 -0
- data/sig/tiktoken_ruby.rbs +4 -0
- metadata +63 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 21aeea521385ab903ffc741a21225dcd60293baf365d54dc856c143936ae0497
|
4
|
+
data.tar.gz: c1cc82c0cbba82dc61f254cfe2dfd6003d07322c542f5cbfefb80ab0894e0f99
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: db397a144bcad2221077514991590ee0fc0620ed730930d809c239a97ef58084249f55f4a24e0fe48dc22bb7af16c60f06df7e79e82f01adefca28b2d8f56bf8
|
7
|
+
data.tar.gz: c63808336c8c94120a94b219b40b923382ede0b0f4cf73d0fdc31c589acef3526f73db672c9c33d3e032f19b73e9e1e1bce0320e5aa8edff534dbf39ba7d9027
|
data/.rspec
ADDED
data/.standard.yml
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
# Specify your gem's dependencies in tiktoken_ruby.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
gem "rake", "~> 13.0"
|
9
|
+
|
10
|
+
gem "rake-compiler"
|
11
|
+
gem "rb_sys"
|
12
|
+
|
13
|
+
gem "rspec", "~> 3.0"
|
14
|
+
|
15
|
+
gem "standard", "~> 1.3"
|
16
|
+
gem 'pry', '~> 0.14.2'
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
tiktoken_ruby (0.0.2)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ast (2.4.2)
|
10
|
+
coderay (1.1.3)
|
11
|
+
diff-lcs (1.5.0)
|
12
|
+
json (2.6.3)
|
13
|
+
language_server-protocol (3.17.0.3)
|
14
|
+
method_source (1.0.0)
|
15
|
+
parallel (1.22.1)
|
16
|
+
parser (3.2.1.1)
|
17
|
+
ast (~> 2.4.1)
|
18
|
+
pry (0.14.2)
|
19
|
+
coderay (~> 1.1)
|
20
|
+
method_source (~> 1.0)
|
21
|
+
rainbow (3.1.1)
|
22
|
+
rake (13.0.6)
|
23
|
+
rake-compiler (1.2.1)
|
24
|
+
rake
|
25
|
+
rb_sys (0.9.68)
|
26
|
+
regexp_parser (2.7.0)
|
27
|
+
rexml (3.2.5)
|
28
|
+
rspec (3.12.0)
|
29
|
+
rspec-core (~> 3.12.0)
|
30
|
+
rspec-expectations (~> 3.12.0)
|
31
|
+
rspec-mocks (~> 3.12.0)
|
32
|
+
rspec-core (3.12.1)
|
33
|
+
rspec-support (~> 3.12.0)
|
34
|
+
rspec-expectations (3.12.2)
|
35
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
36
|
+
rspec-support (~> 3.12.0)
|
37
|
+
rspec-mocks (3.12.4)
|
38
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
39
|
+
rspec-support (~> 3.12.0)
|
40
|
+
rspec-support (3.12.0)
|
41
|
+
rubocop (1.48.1)
|
42
|
+
json (~> 2.3)
|
43
|
+
parallel (~> 1.10)
|
44
|
+
parser (>= 3.2.0.0)
|
45
|
+
rainbow (>= 2.2.2, < 4.0)
|
46
|
+
regexp_parser (>= 1.8, < 3.0)
|
47
|
+
rexml (>= 3.2.5, < 4.0)
|
48
|
+
rubocop-ast (>= 1.26.0, < 2.0)
|
49
|
+
ruby-progressbar (~> 1.7)
|
50
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
51
|
+
rubocop-ast (1.27.0)
|
52
|
+
parser (>= 3.2.1.0)
|
53
|
+
rubocop-performance (1.16.0)
|
54
|
+
rubocop (>= 1.7.0, < 2.0)
|
55
|
+
rubocop-ast (>= 0.4.0)
|
56
|
+
ruby-progressbar (1.13.0)
|
57
|
+
standard (1.25.1)
|
58
|
+
language_server-protocol (~> 3.17.0.2)
|
59
|
+
rubocop (= 1.48.1)
|
60
|
+
rubocop-performance (= 1.16.0)
|
61
|
+
unicode-display_width (2.4.2)
|
62
|
+
|
63
|
+
PLATFORMS
|
64
|
+
arm64-darwin-22
|
65
|
+
x86_64-linux
|
66
|
+
|
67
|
+
DEPENDENCIES
|
68
|
+
pry (~> 0.14.2)
|
69
|
+
rake (~> 13.0)
|
70
|
+
rake-compiler
|
71
|
+
rb_sys
|
72
|
+
rspec (~> 3.0)
|
73
|
+
standard (~> 1.3)
|
74
|
+
tiktoken_ruby!
|
75
|
+
|
76
|
+
BUNDLED WITH
|
77
|
+
2.4.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2023 IAPark
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/tiktoken_ruby.svg)](https://badge.fury.io/rb/tiktoken_ruby)
|
2
|
+
# tiktoken_ruby
|
3
|
+
|
4
|
+
[Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
|
5
|
+
This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Install the gem and add to the application's Gemfile by executing:
|
10
|
+
|
11
|
+
$ bundle add tiktoken_ruby
|
12
|
+
|
13
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
14
|
+
|
15
|
+
$ gem install tiktoken_ruby
|
16
|
+
|
17
|
+
## Usage
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
encoding = Tiktoken::Encoding.r50k_base
|
21
|
+
tokens = encoding.encode("Hello world!")
|
22
|
+
puts encoding.decode(tokens)
|
23
|
+
```
|
24
|
+
|
25
|
+
## Development
|
26
|
+
|
27
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
28
|
+
|
29
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
30
|
+
|
31
|
+
## Contributing
|
32
|
+
|
33
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/iapark/tiktoken_ruby.
|
34
|
+
|
35
|
+
## License
|
36
|
+
|
37
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rspec/core/rake_task"
|
5
|
+
require "standard/rake"
|
6
|
+
require "rake/extensiontask"
|
7
|
+
require "rb_sys/extensiontask"
|
8
|
+
|
9
|
+
GEMSPEC = Gem::Specification.load("tiktoken_ruby.gemspec")
|
10
|
+
|
11
|
+
RbSys::ExtensionTask.new("tiktoken_ruby", GEMSPEC) do |ext|
|
12
|
+
ext.lib_dir = "lib/tiktoken_ruby"
|
13
|
+
end
|
14
|
+
|
15
|
+
RSpec::Core::RakeTask.new(:spec)
|
16
|
+
|
17
|
+
|
18
|
+
task :native, [:platform] do |_t, platform:|
|
19
|
+
sh "bundle", "exec", "rb-sys-dock", "--platform", platform, "--build"
|
20
|
+
end
|
21
|
+
|
22
|
+
task build: :compile
|
23
|
+
|
24
|
+
task default: %i[compile spec standard]
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class Tiktoken::Encoding
|
4
|
+
def self.method_missing(method)
|
5
|
+
Tiktoken::Encoding.new(Tiktoken::BpeFactory.send(method))
|
6
|
+
end
|
7
|
+
|
8
|
+
def initialize(ext_base_bpe)
|
9
|
+
@ext_base_bpe = ext_base_bpe
|
10
|
+
end
|
11
|
+
|
12
|
+
def encode_ordinary(text)
|
13
|
+
@ext_base_bpe.encode_ordinary(text)
|
14
|
+
end
|
15
|
+
|
16
|
+
def encode(text, allowed_special: [])
|
17
|
+
@ext_base_bpe.encode(text, allowed_special)
|
18
|
+
end
|
19
|
+
|
20
|
+
def decode(tokens)
|
21
|
+
@ext_base_bpe.decode(tokens)
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "tiktoken_ruby/version"
|
4
|
+
require_relative "tiktoken_ruby/encoding.rb"
|
5
|
+
|
6
|
+
begin
|
7
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
8
|
+
require_relative "tiktoken_ruby/#{$1}/tiktoken_ruby"
|
9
|
+
rescue LoadError
|
10
|
+
require_relative "tiktoken_ruby/tiktoken_ruby"
|
11
|
+
end
|
12
|
+
|
13
|
+
module Tiktoken
|
14
|
+
class Error < StandardError; end
|
15
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tiktoken_ruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: x86_64-linux-musl
|
6
|
+
authors:
|
7
|
+
- IAPark
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-03-19 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Unofficial Ruby wrapper for Tiktoken by way of the unofficial rust bindings
|
14
|
+
email:
|
15
|
+
- isaac.a.park@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- ".rspec"
|
21
|
+
- ".standard.yml"
|
22
|
+
- Gemfile
|
23
|
+
- Gemfile.lock
|
24
|
+
- LICENSE.txt
|
25
|
+
- README.md
|
26
|
+
- Rakefile
|
27
|
+
- lib/tiktoken_ruby.rb
|
28
|
+
- lib/tiktoken_ruby/2.7/tiktoken_ruby.so
|
29
|
+
- lib/tiktoken_ruby/3.0/tiktoken_ruby.so
|
30
|
+
- lib/tiktoken_ruby/3.1/tiktoken_ruby.so
|
31
|
+
- lib/tiktoken_ruby/3.2/tiktoken_ruby.so
|
32
|
+
- lib/tiktoken_ruby/encoding.rb
|
33
|
+
- lib/tiktoken_ruby/version.rb
|
34
|
+
- sig/tiktoken_ruby.rbs
|
35
|
+
homepage: https://github.com/IAPark/tiktoken_ruby
|
36
|
+
licenses:
|
37
|
+
- MIT
|
38
|
+
metadata:
|
39
|
+
homepage_uri: https://github.com/IAPark/tiktoken_ruby
|
40
|
+
source_code_uri: https://github.com/IAPark/tiktoken_ruby
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '2.7'
|
50
|
+
- - "<"
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 3.3.dev
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 3.1.0
|
58
|
+
requirements: []
|
59
|
+
rubygems_version: 3.4.4
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: Ruby wrapper for Tiktoken
|
63
|
+
test_files: []
|