tiktoken_ruby 0.0.11.1-arm-linux → 0.0.13-arm-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +3 -0
- data/Gemfile.lock +42 -36
- data/README.md +0 -5
- data/lib/tiktoken_ruby/3.2/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/3.3/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/3.4/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +19 -2
- metadata +3 -4
- data/lib/tiktoken_ruby/3.1/tiktoken_ruby.so +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e22f38cbcd6b5c072547bb630fc131a56d2e8646bb4429d07f945274020f614
|
|
4
|
+
data.tar.gz: 65fa6c79c6e3a9c2332be1c48e203f0b73e380aa7d5f9847c61389546d01f67e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6c5809265a09effe5bb9b4f7e98cb399cc19f63724d5a76645f96cdda174e64c8897c79f4493c38f03d21b4ad32860df376e287d7510deb401aecca7522ebc54
|
|
7
|
+
data.tar.gz: 6eab0eb3f5b97eb79555e227d3737841777579ba4121b1b8ba137adea5ec5036228900ae1f7a4b41f905522615252a339329280c0b43bb18a96e34dd44d42441
|
data/.vscode/settings.json
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,74 +1,80 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
tiktoken_ruby (0.0.
|
|
5
|
-
rb_sys (
|
|
4
|
+
tiktoken_ruby (0.0.13)
|
|
5
|
+
rb_sys (~> 0.9)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
|
-
ast (2.4.
|
|
11
|
-
diff-lcs (1.
|
|
12
|
-
json (2.
|
|
13
|
-
language_server-protocol (3.17.0.
|
|
10
|
+
ast (2.4.3)
|
|
11
|
+
diff-lcs (1.6.2)
|
|
12
|
+
json (2.15.0)
|
|
13
|
+
language_server-protocol (3.17.0.5)
|
|
14
14
|
lint_roller (1.1.0)
|
|
15
|
-
minitest (5.
|
|
16
|
-
parallel (1.
|
|
17
|
-
parser (3.3.
|
|
15
|
+
minitest (5.25.5)
|
|
16
|
+
parallel (1.27.0)
|
|
17
|
+
parser (3.3.9.0)
|
|
18
18
|
ast (~> 2.4.1)
|
|
19
19
|
racc
|
|
20
|
+
prism (1.5.1)
|
|
20
21
|
racc (1.8.1)
|
|
21
22
|
rainbow (3.1.1)
|
|
22
|
-
rake (13.
|
|
23
|
-
rake-compiler (1.
|
|
23
|
+
rake (13.3.1)
|
|
24
|
+
rake-compiler (1.3.0)
|
|
24
25
|
rake
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
rake-compiler-dock (1.9.1)
|
|
27
|
+
rb_sys (0.9.117)
|
|
28
|
+
rake-compiler-dock (= 1.9.1)
|
|
29
|
+
regexp_parser (2.11.3)
|
|
30
|
+
rspec (3.13.2)
|
|
28
31
|
rspec-core (~> 3.13.0)
|
|
29
32
|
rspec-expectations (~> 3.13.0)
|
|
30
33
|
rspec-mocks (~> 3.13.0)
|
|
31
|
-
rspec-core (3.13.
|
|
34
|
+
rspec-core (3.13.6)
|
|
32
35
|
rspec-support (~> 3.13.0)
|
|
33
|
-
rspec-expectations (3.13.
|
|
36
|
+
rspec-expectations (3.13.5)
|
|
34
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
35
38
|
rspec-support (~> 3.13.0)
|
|
36
|
-
rspec-mocks (3.13.
|
|
39
|
+
rspec-mocks (3.13.7)
|
|
37
40
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
38
41
|
rspec-support (~> 3.13.0)
|
|
39
|
-
rspec-support (3.13.
|
|
40
|
-
rubocop (1.
|
|
42
|
+
rspec-support (3.13.6)
|
|
43
|
+
rubocop (1.80.2)
|
|
41
44
|
json (~> 2.3)
|
|
42
|
-
language_server-protocol (
|
|
45
|
+
language_server-protocol (~> 3.17.0.2)
|
|
46
|
+
lint_roller (~> 1.1.0)
|
|
43
47
|
parallel (~> 1.10)
|
|
44
48
|
parser (>= 3.3.0.2)
|
|
45
49
|
rainbow (>= 2.2.2, < 4.0)
|
|
46
50
|
regexp_parser (>= 2.9.3, < 3.0)
|
|
47
|
-
rubocop-ast (>= 1.
|
|
51
|
+
rubocop-ast (>= 1.46.0, < 2.0)
|
|
48
52
|
ruby-progressbar (~> 1.7)
|
|
49
53
|
unicode-display_width (>= 2.4.0, < 4.0)
|
|
50
|
-
rubocop-ast (1.
|
|
51
|
-
parser (>= 3.3.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
rubocop-ast (1.47.1)
|
|
55
|
+
parser (>= 3.3.7.2)
|
|
56
|
+
prism (~> 1.4)
|
|
57
|
+
rubocop-performance (1.25.0)
|
|
58
|
+
lint_roller (~> 1.1)
|
|
59
|
+
rubocop (>= 1.75.0, < 2.0)
|
|
60
|
+
rubocop-ast (>= 1.38.0, < 2.0)
|
|
55
61
|
ruby-progressbar (1.13.0)
|
|
56
|
-
standard (1.
|
|
62
|
+
standard (1.51.1)
|
|
57
63
|
language_server-protocol (~> 3.17.0.2)
|
|
58
64
|
lint_roller (~> 1.0)
|
|
59
|
-
rubocop (~> 1.
|
|
65
|
+
rubocop (~> 1.80.2)
|
|
60
66
|
standard-custom (~> 1.0.0)
|
|
61
|
-
standard-performance (~> 1.
|
|
67
|
+
standard-performance (~> 1.8)
|
|
62
68
|
standard-custom (1.0.2)
|
|
63
69
|
lint_roller (~> 1.0)
|
|
64
70
|
rubocop (~> 1.50)
|
|
65
|
-
standard-performance (1.
|
|
71
|
+
standard-performance (1.8.0)
|
|
66
72
|
lint_roller (~> 1.1)
|
|
67
|
-
rubocop-performance (~> 1.
|
|
68
|
-
unicode-display_width (3.
|
|
69
|
-
unicode-emoji (~> 4.
|
|
70
|
-
unicode-emoji (4.0
|
|
71
|
-
yard (0.9.
|
|
73
|
+
rubocop-performance (~> 1.25.0)
|
|
74
|
+
unicode-display_width (3.2.0)
|
|
75
|
+
unicode-emoji (~> 4.1)
|
|
76
|
+
unicode-emoji (4.1.0)
|
|
77
|
+
yard (0.9.37)
|
|
72
78
|
yard-doctest (0.1.17)
|
|
73
79
|
minitest
|
|
74
80
|
yard
|
|
@@ -89,4 +95,4 @@ DEPENDENCIES
|
|
|
89
95
|
yard-doctest
|
|
90
96
|
|
|
91
97
|
BUNDLED WITH
|
|
92
|
-
2.
|
|
98
|
+
2.6.9
|
data/README.md
CHANGED
|
@@ -5,11 +5,6 @@
|
|
|
5
5
|
[Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
|
|
6
6
|
This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
|
|
7
7
|
|
|
8
|
-
## Request for maintainers
|
|
9
|
-
|
|
10
|
-
I can't really put substantial time into maintaining this. Probably nothing more than a couple hours every few months. If you have experience maintaining ruby gems and would like to
|
|
11
|
-
lend a hand please send me an email or reply to this [issue](https://github.com/IAPark/tiktoken_ruby/issues/26)
|
|
12
|
-
|
|
13
8
|
## Installation
|
|
14
9
|
|
|
15
10
|
Install the gem and add to the application's Gemfile by executing:
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
data/lib/tiktoken_ruby.rb
CHANGED
|
@@ -65,7 +65,8 @@ module Tiktoken
|
|
|
65
65
|
:p50k_base,
|
|
66
66
|
:p50k_edit,
|
|
67
67
|
:cl100k_base,
|
|
68
|
-
:o200k_base
|
|
68
|
+
:o200k_base,
|
|
69
|
+
:o200k_harmony
|
|
69
70
|
]
|
|
70
71
|
|
|
71
72
|
# taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
|
|
@@ -73,11 +74,17 @@ module Tiktoken
|
|
|
73
74
|
# https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
|
|
74
75
|
# is the source of the mapping for the Rust library
|
|
75
76
|
MODEL_TO_ENCODING_NAME = {
|
|
77
|
+
# reasoning
|
|
78
|
+
o1: "o200k_base",
|
|
79
|
+
o3: "o200k_base",
|
|
80
|
+
"o4-mini": "o200k_base",
|
|
76
81
|
# chat
|
|
77
|
-
"
|
|
82
|
+
"gpt-4.1": "o200k_base",
|
|
83
|
+
"chatgpt-4o": "o200k_base",
|
|
78
84
|
"gpt-4o": "o200k_base",
|
|
79
85
|
"gpt-4": "cl100k_base",
|
|
80
86
|
"gpt-3.5-turbo": "cl100k_base",
|
|
87
|
+
"gpt-3.5": "cl100k_base", # Common shorthand
|
|
81
88
|
"gpt-35-turbo": "cl100k_base", # Azure deployment name
|
|
82
89
|
# base
|
|
83
90
|
"davinci-002": "cl100k_base",
|
|
@@ -124,12 +131,22 @@ module Tiktoken
|
|
|
124
131
|
}
|
|
125
132
|
|
|
126
133
|
MODEL_PREFIX_TO_ENCODING = {
|
|
134
|
+
# reasoning
|
|
135
|
+
"o1-": "o200k_base",
|
|
136
|
+
"o3-": "o200k_base",
|
|
137
|
+
"o4-": "o200k_base",
|
|
127
138
|
# chat
|
|
139
|
+
"gpt-5-": "o200k_base",
|
|
140
|
+
"gpt-4.5-": "o200k_base",
|
|
141
|
+
"gpt-4.1-": "o200k_base",
|
|
142
|
+
"chatgpt-4o-": "o200k_base",
|
|
128
143
|
"gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
|
|
129
144
|
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
|
|
130
145
|
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
|
|
131
146
|
"gpt-35-turbo-": "cl100k_base", # Azure deployment name
|
|
147
|
+
"gpt-oss-": "o200k_harmony",
|
|
132
148
|
# fine-tuned
|
|
149
|
+
"ft:gpt-4o": "cl100k_base",
|
|
133
150
|
"ft:gpt-4": "cl100k_base",
|
|
134
151
|
"ft:gpt-3.5-turbo": "cl100k_base",
|
|
135
152
|
"ft:davinci-002": "cl100k_base",
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tiktoken_ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.13
|
|
5
5
|
platform: arm-linux
|
|
6
6
|
authors:
|
|
7
7
|
- IAPark
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-11-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
|
|
14
14
|
used by OpenAI. It can be used to count the number of tokens in text before sending
|
|
@@ -29,7 +29,6 @@ files:
|
|
|
29
29
|
- Rakefile
|
|
30
30
|
- doctest_helper.rb
|
|
31
31
|
- lib/tiktoken_ruby.rb
|
|
32
|
-
- lib/tiktoken_ruby/3.1/tiktoken_ruby.so
|
|
33
32
|
- lib/tiktoken_ruby/3.2/tiktoken_ruby.so
|
|
34
33
|
- lib/tiktoken_ruby/3.3/tiktoken_ruby.so
|
|
35
34
|
- lib/tiktoken_ruby/3.4/tiktoken_ruby.so
|
|
@@ -52,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
52
51
|
requirements:
|
|
53
52
|
- - ">="
|
|
54
53
|
- !ruby/object:Gem::Version
|
|
55
|
-
version: '3.
|
|
54
|
+
version: '3.2'
|
|
56
55
|
- - "<"
|
|
57
56
|
- !ruby/object:Gem::Version
|
|
58
57
|
version: 3.5.dev
|
|
Binary file
|