tiktoken_ruby 0.0.11.1-arm-linux → 0.0.15.1-arm-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3077759be498ef740196b12c300d1111f9a38a7c9f0d7d63d7a6b6e785a789ea
4
- data.tar.gz: 6441e04eeb2c638dc663cc456d1db9decfb56ef14e143adb0731b4c39369b8b0
3
+ metadata.gz: 494d7edbd43e3f178cec789d7cd89627aa442a72085a7f34ad99ea04b89ef0a6
4
+ data.tar.gz: ae9622698383d18f32894b887423aaab2a3b79b9dc0f79f13fe9eedf0fdd49f1
5
5
  SHA512:
6
- metadata.gz: b065e21826de85f30c825aa59535ed04d16254b82d8c3fff1344cbfba7a4619974b025444b1f14bc10e470b09629f0db416b1f04ae33ab244e9feae6a7c1e847
7
- data.tar.gz: de67e9dae497859fec83f691375dfac1872ad395eb5188ea1595f99a3a9f0d1bd468d4e9f500544df1415bbfa9bd6124b33333bf477c4f489f6c9dc7b3dcaba5
6
+ metadata.gz: 6d96928e64a8d4a5306ee874707af9aae303803f690aa64f9639ab19bb3191e30316624c54e6dc8b57519c17452b7790b66eeb391505e3fa203a14e0255bd8ae
7
+ data.tar.gz: a365c641b38fb87dfa95e2045bb807954822da9a7cba8e1bece12de40de4acf6f9cf93883d8e2dc7c7489f585a546b0607d983bdf3076de219adcd6a848d4fa7
@@ -1,5 +1,11 @@
1
1
  {
2
2
  "[ruby]": {
3
3
  "editor.defaultFormatter": "Shopify.ruby-lsp"
4
+ },
5
+ "[markdown]": {
6
+ "editor.defaultFormatter": "esbenp.prettier-vscode"
7
+ },
8
+ "[github-actions-workflow]": {
9
+ "editor.defaultFormatter": "redhat.vscode-yaml"
4
10
  }
5
11
  }
data/CHANGELOG.md ADDED
@@ -0,0 +1,41 @@
1
+ # [v0.0.15.1] - 07-01-2026
2
+ ## What's Changed
3
+ * Re-release 0.0.15 for mistakenly yanked gems by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/105
4
+
5
+
6
+ **Full Changelog**: https://github.com/IAPark/tiktoken_ruby/compare/v0.0.15...v0.0.15.1
7
+ # [v0.0.15] - 06-01-2026
8
+ ## What's Changed
9
+ * No longer true by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/96
10
+ * Test build on version change by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/97
11
+ * Bump actions/cache from 4 to 5 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/98
12
+ * Bump rb-sys from 0.9.117 to 0.9.123 in the cargo group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/99
13
+ * Bump the bundler-dependencies group with 2 updates by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/100
14
+ * Add Ruby 4.0 to build matrix by @chubchenko in https://github.com/IAPark/tiktoken_ruby/pull/101
15
+ * Support Ruby 4 by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/102
16
+
17
+ ## New Contributors
18
+ * @chubchenko made their first contribution in https://github.com/IAPark/tiktoken_ruby/pull/101
19
+
20
+ **Full Changelog**: https://github.com/IAPark/tiktoken_ruby/compare/v0.0.14.1...v0.0.15
21
+ # [v0.0.14.1] - 20-12-2025
22
+ ## What's Changed
23
+ * Cut v0.0.12 by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/78
24
+ * Bump magnus from 0.8.0 to 0.8.1 in the cargo group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/83
25
+ * Bump actions/checkout from 4 to 5 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/82
26
+ * Bump standard from 1.50.0 to 1.51.1 in the bundler-dependencies group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/81
27
+ * Bump actions/upload-artifact from 4 to 5 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/84
28
+ * Bump magnus from 0.8.1 to 0.8.2 in the cargo group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/85
29
+ * Bump the bundler-dependencies group with 2 updates by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/86
30
+ * Support by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/87
31
+ * Bump actions/checkout from 5 to 6 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/89
32
+ * Bump standard from 1.51.1 to 1.52.0 in the bundler-dependencies group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/88
33
+ * release GVL while encoding / decoding tokens by @tenderworks in https://github.com/IAPark/tiktoken_ruby/pull/90
34
+ * Drop Ruby 3.1 support; automate release process by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/92
35
+ * Rewrite history by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/93
36
+ * Force workflow rebuild by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/94
37
+
38
+ ## New Contributors
39
+ * @tenderworks made their first contribution in https://github.com/IAPark/tiktoken_ruby/pull/90
40
+
41
+ **Full Changelog**: https://github.com/IAPark/tiktoken_ruby/compare/v0.0.12...v0.0.14.1
data/Gemfile.lock CHANGED
@@ -1,74 +1,81 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tiktoken_ruby (0.0.11.1)
5
- rb_sys (= 0.9.106)
4
+ tiktoken_ruby (0.0.15.1)
5
+ rb_sys (~> 0.9)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- ast (2.4.2)
11
- diff-lcs (1.5.1)
12
- json (2.9.1)
13
- language_server-protocol (3.17.0.3)
10
+ ast (2.4.3)
11
+ diff-lcs (1.6.2)
12
+ json (2.16.0)
13
+ language_server-protocol (3.17.0.5)
14
14
  lint_roller (1.1.0)
15
- minitest (5.21.2)
16
- parallel (1.26.3)
17
- parser (3.3.6.0)
15
+ minitest (6.0.1)
16
+ prism (~> 1.5)
17
+ parallel (1.27.0)
18
+ parser (3.3.10.0)
18
19
  ast (~> 2.4.1)
19
20
  racc
21
+ prism (1.6.0)
20
22
  racc (1.8.1)
21
23
  rainbow (3.1.1)
22
- rake (13.2.1)
23
- rake-compiler (1.2.9)
24
+ rake (13.3.1)
25
+ rake-compiler (1.3.1)
24
26
  rake
25
- rb_sys (0.9.106)
26
- regexp_parser (2.10.0)
27
- rspec (3.13.0)
27
+ rake-compiler-dock (1.10.0)
28
+ rb_sys (0.9.123)
29
+ rake-compiler-dock (= 1.10.0)
30
+ regexp_parser (2.11.3)
31
+ rspec (3.13.2)
28
32
  rspec-core (~> 3.13.0)
29
33
  rspec-expectations (~> 3.13.0)
30
34
  rspec-mocks (~> 3.13.0)
31
- rspec-core (3.13.2)
35
+ rspec-core (3.13.6)
32
36
  rspec-support (~> 3.13.0)
33
- rspec-expectations (3.13.3)
37
+ rspec-expectations (3.13.5)
34
38
  diff-lcs (>= 1.2.0, < 2.0)
35
39
  rspec-support (~> 3.13.0)
36
- rspec-mocks (3.13.2)
40
+ rspec-mocks (3.13.7)
37
41
  diff-lcs (>= 1.2.0, < 2.0)
38
42
  rspec-support (~> 3.13.0)
39
- rspec-support (3.13.2)
40
- rubocop (1.69.2)
43
+ rspec-support (3.13.6)
44
+ rubocop (1.81.7)
41
45
  json (~> 2.3)
42
- language_server-protocol (>= 3.17.0)
46
+ language_server-protocol (~> 3.17.0.2)
47
+ lint_roller (~> 1.1.0)
43
48
  parallel (~> 1.10)
44
49
  parser (>= 3.3.0.2)
45
50
  rainbow (>= 2.2.2, < 4.0)
46
51
  regexp_parser (>= 2.9.3, < 3.0)
47
- rubocop-ast (>= 1.36.2, < 2.0)
52
+ rubocop-ast (>= 1.47.1, < 2.0)
48
53
  ruby-progressbar (~> 1.7)
49
54
  unicode-display_width (>= 2.4.0, < 4.0)
50
- rubocop-ast (1.37.0)
51
- parser (>= 3.3.1.0)
52
- rubocop-performance (1.23.0)
53
- rubocop (>= 1.48.1, < 2.0)
54
- rubocop-ast (>= 1.31.1, < 2.0)
55
+ rubocop-ast (1.48.0)
56
+ parser (>= 3.3.7.2)
57
+ prism (~> 1.4)
58
+ rubocop-performance (1.25.0)
59
+ lint_roller (~> 1.1)
60
+ rubocop (>= 1.75.0, < 2.0)
61
+ rubocop-ast (>= 1.38.0, < 2.0)
55
62
  ruby-progressbar (1.13.0)
56
- standard (1.43.0)
63
+ standard (1.52.0)
57
64
  language_server-protocol (~> 3.17.0.2)
58
65
  lint_roller (~> 1.0)
59
- rubocop (~> 1.69.1)
66
+ rubocop (~> 1.81.7)
60
67
  standard-custom (~> 1.0.0)
61
- standard-performance (~> 1.6)
68
+ standard-performance (~> 1.8)
62
69
  standard-custom (1.0.2)
63
70
  lint_roller (~> 1.0)
64
71
  rubocop (~> 1.50)
65
- standard-performance (1.6.0)
72
+ standard-performance (1.8.0)
66
73
  lint_roller (~> 1.1)
67
- rubocop-performance (~> 1.23.0)
68
- unicode-display_width (3.1.3)
69
- unicode-emoji (~> 4.0, >= 4.0.4)
70
- unicode-emoji (4.0.4)
71
- yard (0.9.34)
74
+ rubocop-performance (~> 1.25.0)
75
+ unicode-display_width (3.2.0)
76
+ unicode-emoji (~> 4.1)
77
+ unicode-emoji (4.2.0)
78
+ yard (0.9.37)
72
79
  yard-doctest (0.1.17)
73
80
  minitest
74
81
  yard
@@ -89,4 +96,4 @@ DEPENDENCIES
89
96
  yard-doctest
90
97
 
91
98
  BUNDLED WITH
92
- 2.5.18
99
+ 2.6.9
data/README.md CHANGED
@@ -5,11 +5,6 @@
5
5
  [Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
6
6
  This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
7
7
 
8
- ## Request for maintainers
9
-
10
- I can't really put substantial time into maintaining this. Probably nothing more than a couple hours every few months. If you have experience maintaining ruby gems and would like to
11
- lend a hand please send me an email or reply to this [issue](https://github.com/IAPark/tiktoken_ruby/issues/26)
12
-
13
8
  ## Installation
14
9
 
15
10
  Install the gem and add to the application's Gemfile by executing:
@@ -41,11 +36,44 @@ enc = Tiktoken.encoding_for_model("gpt-4")
41
36
  enc.encode("hello world").length #=> 2
42
37
  ```
43
38
 
39
+ ### Encoding methods
40
+
41
+ There are three methods for encoding text:
42
+
43
+ - `encode_ordinary(text)` - Encodes text, always treating special tokens as ordinary text
44
+ - `encode(text, allowed_special: [])` - Encodes text, treating special tokens as text unless listed in `allowed_special`
45
+ - `encode_with_special_tokens(text)` - Encodes text, recognizing and parsing all special tokens
46
+
47
+ **Special tokens** are control sequences used by OpenAI models, such as `<|endoftext|>`, `<|fim_prefix|>`, `<|fim_middle|>`, and `<|fim_suffix|>`. The encoding methods differ in how they handle these sequences:
48
+
49
+ ```ruby
50
+ enc = Tiktoken.get_encoding("cl100k_base")
51
+ text = "Hello<|endoftext|>World"
52
+
53
+ # encode_ordinary: treats <|endoftext|> as literal characters (9 tokens)
54
+ enc.encode_ordinary(text)
55
+ #=> [9906, 27, 91, 8862, 728, 428, 91, 29, 10343]
56
+
57
+ # encode: same as encode_ordinary by default
58
+ enc.encode(text)
59
+ #=> [9906, 27, 91, 8862, 728, 428, 91, 29, 10343]
60
+
61
+ # encode with allowed_special: recognizes the specified special token (3 tokens)
62
+ enc.encode(text, allowed_special: ["<|endoftext|>"])
63
+ #=> [9906, 100257, 10343]
64
+
65
+ # encode_with_special_tokens: recognizes ALL special tokens (3 tokens)
66
+ enc.encode_with_special_tokens(text)
67
+ #=> [9906, 100257, 10343]
68
+ ```
69
+
70
+ All methods round-trip correctly through `decode`.
71
+
44
72
  ## Development
45
73
 
46
74
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
47
75
 
48
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
76
+ To install this gem onto your local machine, run `bundle exec rake install`.
49
77
 
50
78
  ## Contributing
51
79
 
Binary file
Binary file
Binary file
@@ -40,6 +40,13 @@ class Tiktoken::Encoding
40
40
  @ext_base_bpe.encode(text, allowed_special)
41
41
  end
42
42
 
43
+ # Encodes the text as a list of integer tokens, including special tokens.
44
+ # @param text [String] The text to encode
45
+ # @return [Array<Integer>] The encoded tokens
46
+ def encode_with_special_tokens(text)
47
+ @ext_base_bpe.encode_with_special_tokens(text)
48
+ end
49
+
43
50
  # Decodes the tokens back into text
44
51
  # @param tokens [Array<Integer>] The tokens to decode
45
52
  # @return [String] The decoded text
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tiktoken
4
- VERSION = "0.0.11.1"
4
+ VERSION = "0.0.15.1"
5
5
  end
data/lib/tiktoken_ruby.rb CHANGED
@@ -65,7 +65,8 @@ module Tiktoken
65
65
  :p50k_base,
66
66
  :p50k_edit,
67
67
  :cl100k_base,
68
- :o200k_base
68
+ :o200k_base,
69
+ :o200k_harmony
69
70
  ]
70
71
 
71
72
  # taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
@@ -73,11 +74,17 @@ module Tiktoken
73
74
  # https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
74
75
  # is the source of the mapping for the Rust library
75
76
  MODEL_TO_ENCODING_NAME = {
77
+ # reasoning
78
+ o1: "o200k_base",
79
+ o3: "o200k_base",
80
+ "o4-mini": "o200k_base",
76
81
  # chat
77
- "chatgpt-4o-latest": "o200k_base",
82
+ "gpt-4.1": "o200k_base",
83
+ "chatgpt-4o": "o200k_base",
78
84
  "gpt-4o": "o200k_base",
79
85
  "gpt-4": "cl100k_base",
80
86
  "gpt-3.5-turbo": "cl100k_base",
87
+ "gpt-3.5": "cl100k_base", # Common shorthand
81
88
  "gpt-35-turbo": "cl100k_base", # Azure deployment name
82
89
  # base
83
90
  "davinci-002": "cl100k_base",
@@ -124,12 +131,22 @@ module Tiktoken
124
131
  }
125
132
 
126
133
  MODEL_PREFIX_TO_ENCODING = {
134
+ # reasoning
135
+ "o1-": "o200k_base",
136
+ "o3-": "o200k_base",
137
+ "o4-": "o200k_base",
127
138
  # chat
139
+ "gpt-5-": "o200k_base",
140
+ "gpt-4.5-": "o200k_base",
141
+ "gpt-4.1-": "o200k_base",
142
+ "chatgpt-4o-": "o200k_base",
128
143
  "gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
129
144
  "gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
130
145
  "gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
131
146
  "gpt-35-turbo-": "cl100k_base", # Azure deployment name
147
+ "gpt-oss-": "o200k_harmony",
132
148
  # fine-tuned
149
+ "ft:gpt-4o": "cl100k_base",
133
150
  "ft:gpt-4": "cl100k_base",
134
151
  "ft:gpt-3.5-turbo": "cl100k_base",
135
152
  "ft:davinci-002": "cl100k_base",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiktoken_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11.1
4
+ version: 0.0.15.1
5
5
  platform: arm-linux
6
6
  authors:
7
7
  - IAPark
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-01-08 00:00:00.000000000 Z
11
+ date: 2026-01-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
14
14
  used by OpenAI. It can be used to count the number of tokens in text before sending
@@ -22,6 +22,7 @@ files:
22
22
  - ".rspec"
23
23
  - ".standard.yml"
24
24
  - ".vscode/settings.json"
25
+ - CHANGELOG.md
25
26
  - Gemfile
26
27
  - Gemfile.lock
27
28
  - LICENSE.txt
@@ -29,13 +30,12 @@ files:
29
30
  - Rakefile
30
31
  - doctest_helper.rb
31
32
  - lib/tiktoken_ruby.rb
32
- - lib/tiktoken_ruby/3.1/tiktoken_ruby.so
33
33
  - lib/tiktoken_ruby/3.2/tiktoken_ruby.so
34
34
  - lib/tiktoken_ruby/3.3/tiktoken_ruby.so
35
35
  - lib/tiktoken_ruby/3.4/tiktoken_ruby.so
36
+ - lib/tiktoken_ruby/4.0/tiktoken_ruby.so
36
37
  - lib/tiktoken_ruby/encoding.rb
37
38
  - lib/tiktoken_ruby/version.rb
38
- - script/release
39
39
  - sig/tiktoken_ruby.rbs
40
40
  homepage: https://github.com/IAPark/tiktoken_ruby
41
41
  licenses:
@@ -52,10 +52,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
52
52
  requirements:
53
53
  - - ">="
54
54
  - !ruby/object:Gem::Version
55
- version: '3.1'
55
+ version: '3.2'
56
56
  - - "<"
57
57
  - !ruby/object:Gem::Version
58
- version: 3.5.dev
58
+ version: 4.1.dev
59
59
  required_rubygems_version: !ruby/object:Gem::Requirement
60
60
  requirements:
61
61
  - - ">="
data/script/release DELETED
@@ -1,43 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- set -e
4
-
5
- if [ -z "${TIKTOKEN_PUBLISH_KEY}" ]; then
6
- echo "Error: TIKTOKEN_PUBLISH_KEY is not set. This is the RubyGems API key to push the gem."
7
- exit 1
8
- fi
9
-
10
- run_id=""
11
- # Parse arguments
12
- while [[ "$#" -gt 0 ]]; do
13
- case $1 in
14
- --run-id)
15
- run_id="$2"
16
- shift 2
17
- ;;
18
- *)
19
- echo "Unknown parameter passed: $1"
20
- exit 1
21
- ;;
22
- esac
23
- done
24
-
25
- if [ -z "${run_id}" ]; then
26
- echo "Error: --run-id is not provided. Please provide the GitHub Action run id for the cross-compile workflow."
27
- exit 1
28
- fi
29
-
30
- version=$(grep VERSION lib/tiktoken_ruby/version.rb | head -n 1 | cut -d'"' -f2)
31
- echo "Building tiktoken_ruby v$version, using artifacts from run $run_id"
32
-
33
- rm -rf pkg/cross-compiled
34
- gh run download "$run_id" -D pkg/cross-compiled
35
-
36
- for gem in pkg/cross-compiled/cross-gem-*/tiktoken_ruby-"$version"*.gem ; do
37
- echo "Publishing $gem"
38
- GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "$gem" --host https://rubygems.org
39
- done
40
-
41
- # last but not least, the uncompiled gem
42
- bundle exec rake package
43
- GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "pkg/tiktoken_ruby-$version.gem" --host https://rubygems.org