tiktoken_ruby 0.0.11-x64-mingw-ucrt → 0.0.12-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +3 -0
- data/Gemfile.lock +39 -33
- data/README.md +0 -5
- data/lib/tiktoken_ruby/3.2/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/{3.1 → 3.3}/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/3.4/tiktoken_ruby.so +0 -0
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +16 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 779cae4821cc95ad89396f6b6858333913c82694c37915e55c26eeaae5cd04b1
|
4
|
+
data.tar.gz: 4cfd182bfad73e6a82c8ba2e365b439edb89c04083dc87ef5877032b2ed6da30
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7075e5111a25948d137445247289ed1a898f6cef7de1873c7e12d0d46213ceb2fae993ae80c14aca663e574379a14dad7e850b4985f878e4c2488ed3d324c752
|
7
|
+
data.tar.gz: a435626aabba541d6d1c45356e78b9f1660a9f73d1c18d1482ee200bf4c167f026e624a2a2ef354beabe7d59cf726d0dca12b83ee23a741f97272c6f809e3e47
|
data/.vscode/settings.json
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,74 +1,80 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
tiktoken_ruby (0.0.
|
5
|
-
rb_sys (
|
4
|
+
tiktoken_ruby (0.0.12)
|
5
|
+
rb_sys (~> 0.9)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
ast (2.4.
|
11
|
-
diff-lcs (1.
|
12
|
-
json (2.
|
13
|
-
language_server-protocol (3.17.0.
|
10
|
+
ast (2.4.3)
|
11
|
+
diff-lcs (1.6.2)
|
12
|
+
json (2.12.0)
|
13
|
+
language_server-protocol (3.17.0.5)
|
14
14
|
lint_roller (1.1.0)
|
15
|
-
minitest (5.
|
16
|
-
parallel (1.
|
17
|
-
parser (3.3.
|
15
|
+
minitest (5.25.5)
|
16
|
+
parallel (1.27.0)
|
17
|
+
parser (3.3.8.0)
|
18
18
|
ast (~> 2.4.1)
|
19
19
|
racc
|
20
|
+
prism (1.4.0)
|
20
21
|
racc (1.8.1)
|
21
22
|
rainbow (3.1.1)
|
22
|
-
rake (13.
|
23
|
-
rake-compiler (1.
|
23
|
+
rake (13.3.0)
|
24
|
+
rake-compiler (1.3.0)
|
24
25
|
rake
|
25
|
-
|
26
|
+
rake-compiler-dock (1.9.1)
|
27
|
+
rb_sys (0.9.117)
|
28
|
+
rake-compiler-dock (= 1.9.1)
|
26
29
|
regexp_parser (2.10.0)
|
27
|
-
rspec (3.13.
|
30
|
+
rspec (3.13.1)
|
28
31
|
rspec-core (~> 3.13.0)
|
29
32
|
rspec-expectations (~> 3.13.0)
|
30
33
|
rspec-mocks (~> 3.13.0)
|
31
|
-
rspec-core (3.13.
|
34
|
+
rspec-core (3.13.5)
|
32
35
|
rspec-support (~> 3.13.0)
|
33
|
-
rspec-expectations (3.13.
|
36
|
+
rspec-expectations (3.13.5)
|
34
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
35
38
|
rspec-support (~> 3.13.0)
|
36
|
-
rspec-mocks (3.13.
|
39
|
+
rspec-mocks (3.13.5)
|
37
40
|
diff-lcs (>= 1.2.0, < 2.0)
|
38
41
|
rspec-support (~> 3.13.0)
|
39
|
-
rspec-support (3.13.
|
40
|
-
rubocop (1.
|
42
|
+
rspec-support (3.13.5)
|
43
|
+
rubocop (1.75.7)
|
41
44
|
json (~> 2.3)
|
42
|
-
language_server-protocol (
|
45
|
+
language_server-protocol (~> 3.17.0.2)
|
46
|
+
lint_roller (~> 1.1.0)
|
43
47
|
parallel (~> 1.10)
|
44
48
|
parser (>= 3.3.0.2)
|
45
49
|
rainbow (>= 2.2.2, < 4.0)
|
46
50
|
regexp_parser (>= 2.9.3, < 3.0)
|
47
|
-
rubocop-ast (>= 1.
|
51
|
+
rubocop-ast (>= 1.44.0, < 2.0)
|
48
52
|
ruby-progressbar (~> 1.7)
|
49
53
|
unicode-display_width (>= 2.4.0, < 4.0)
|
50
|
-
rubocop-ast (1.
|
51
|
-
parser (>= 3.3.
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
rubocop-ast (1.44.1)
|
55
|
+
parser (>= 3.3.7.2)
|
56
|
+
prism (~> 1.4)
|
57
|
+
rubocop-performance (1.25.0)
|
58
|
+
lint_roller (~> 1.1)
|
59
|
+
rubocop (>= 1.75.0, < 2.0)
|
60
|
+
rubocop-ast (>= 1.38.0, < 2.0)
|
55
61
|
ruby-progressbar (1.13.0)
|
56
|
-
standard (1.
|
62
|
+
standard (1.50.0)
|
57
63
|
language_server-protocol (~> 3.17.0.2)
|
58
64
|
lint_roller (~> 1.0)
|
59
|
-
rubocop (~> 1.
|
65
|
+
rubocop (~> 1.75.5)
|
60
66
|
standard-custom (~> 1.0.0)
|
61
|
-
standard-performance (~> 1.
|
67
|
+
standard-performance (~> 1.8)
|
62
68
|
standard-custom (1.0.2)
|
63
69
|
lint_roller (~> 1.0)
|
64
70
|
rubocop (~> 1.50)
|
65
|
-
standard-performance (1.
|
71
|
+
standard-performance (1.8.0)
|
66
72
|
lint_roller (~> 1.1)
|
67
|
-
rubocop-performance (~> 1.
|
68
|
-
unicode-display_width (3.1.
|
73
|
+
rubocop-performance (~> 1.25.0)
|
74
|
+
unicode-display_width (3.1.4)
|
69
75
|
unicode-emoji (~> 4.0, >= 4.0.4)
|
70
76
|
unicode-emoji (4.0.4)
|
71
|
-
yard (0.9.
|
77
|
+
yard (0.9.37)
|
72
78
|
yard-doctest (0.1.17)
|
73
79
|
minitest
|
74
80
|
yard
|
@@ -89,4 +95,4 @@ DEPENDENCIES
|
|
89
95
|
yard-doctest
|
90
96
|
|
91
97
|
BUNDLED WITH
|
92
|
-
2.
|
98
|
+
2.6.9
|
data/README.md
CHANGED
@@ -5,11 +5,6 @@
|
|
5
5
|
[Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
|
6
6
|
This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
|
7
7
|
|
8
|
-
## Request for maintainers
|
9
|
-
|
10
|
-
I can't really put substantial time into maintaining this. Probably nothing more than a couple hours every few months. If you have experience maintaining ruby gems and would like to
|
11
|
-
lend a hand please send me an email or reply to this [issue](https://github.com/IAPark/tiktoken_ruby/issues/26)
|
12
|
-
|
13
8
|
## Installation
|
14
9
|
|
15
10
|
Install the gem and add to the application's Gemfile by executing:
|
Binary file
|
Binary file
|
Binary file
|
data/lib/tiktoken_ruby.rb
CHANGED
@@ -73,11 +73,17 @@ module Tiktoken
|
|
73
73
|
# https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
|
74
74
|
# is the source of the mapping for the Rust library
|
75
75
|
MODEL_TO_ENCODING_NAME = {
|
76
|
+
# reasoning
|
77
|
+
o1: "o200k_base",
|
78
|
+
o3: "o200k_base",
|
79
|
+
"o4-mini": "o200k_base",
|
76
80
|
# chat
|
77
|
-
"
|
81
|
+
"gpt-4.1": "o200k_base",
|
82
|
+
"chatgpt-4o": "o200k_base",
|
78
83
|
"gpt-4o": "o200k_base",
|
79
84
|
"gpt-4": "cl100k_base",
|
80
85
|
"gpt-3.5-turbo": "cl100k_base",
|
86
|
+
"gpt-3.5": "cl100k_base", # Common shorthand
|
81
87
|
"gpt-35-turbo": "cl100k_base", # Azure deployment name
|
82
88
|
# base
|
83
89
|
"davinci-002": "cl100k_base",
|
@@ -124,12 +130,21 @@ module Tiktoken
|
|
124
130
|
}
|
125
131
|
|
126
132
|
MODEL_PREFIX_TO_ENCODING = {
|
133
|
+
# reasoning
|
134
|
+
"o1-": "o200k_base",
|
135
|
+
"o3-": "o200k_base",
|
136
|
+
"o4-": "o200k_base",
|
127
137
|
# chat
|
138
|
+
"gpt-5-": "o200k_base",
|
139
|
+
"gpt-4.5-": "o200k_base",
|
140
|
+
"gpt-4.1-": "o200k_base",
|
141
|
+
"chatgpt-4o-": "o200k_base",
|
128
142
|
"gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
|
129
143
|
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
|
130
144
|
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
|
131
145
|
"gpt-35-turbo-": "cl100k_base", # Azure deployment name
|
132
146
|
# fine-tuned
|
147
|
+
"ft:gpt-4o": "cl100k_base",
|
133
148
|
"ft:gpt-4": "cl100k_base",
|
134
149
|
"ft:gpt-3.5-turbo": "cl100k_base",
|
135
150
|
"ft:davinci-002": "cl100k_base",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiktoken_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: x64-mingw-ucrt
|
6
6
|
authors:
|
7
7
|
- IAPark
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
|
14
14
|
used by OpenAI. It can be used to count the number of tokens in text before sending
|
@@ -29,8 +29,8 @@ files:
|
|
29
29
|
- Rakefile
|
30
30
|
- doctest_helper.rb
|
31
31
|
- lib/tiktoken_ruby.rb
|
32
|
-
- lib/tiktoken_ruby/3.1/tiktoken_ruby.so
|
33
32
|
- lib/tiktoken_ruby/3.2/tiktoken_ruby.so
|
33
|
+
- lib/tiktoken_ruby/3.3/tiktoken_ruby.so
|
34
34
|
- lib/tiktoken_ruby/3.4/tiktoken_ruby.so
|
35
35
|
- lib/tiktoken_ruby/encoding.rb
|
36
36
|
- lib/tiktoken_ruby/version.rb
|
@@ -51,7 +51,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '3.
|
54
|
+
version: '3.2'
|
55
55
|
- - "<"
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: 3.5.dev
|