tiktoken_ruby 0.0.13-x86_64-linux → 0.0.14.1-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 396dfc370a282d775b1b19e168a68ee061851022b747a40e6295e0f2e129fe7e
4
- data.tar.gz: bae3d61446f4ec11d54088779b6dd497ba7015375e4b904076f7d2eb1885068e
3
+ metadata.gz: 8bc9e92f57d58214f292fc24ef4f9f48980cbfb047853b69e385d7224b53b4a3
4
+ data.tar.gz: 976e2a3cbc94f153d90a038e35d0272fe30c1efd1c58c86c5c4210cfdfde9532
5
5
  SHA512:
6
- metadata.gz: 8ab17cc7f4c8c48a1831fd26d1ed73e29fdec920bb4793a2d89cea23ebf46f91c82a0c3752b21bb35fdba8b62b217ff5346576012075b9038c4e6b1b07ef623c
7
- data.tar.gz: d3655dda73c30fdeca86621332e5d2583578f2b508a75b343db787dff44ea3489217a53e18ef552157ef8d7e5dbcd95c8663f56c87e5e0edffc24e42eacb15ba
6
+ metadata.gz: f72a3f4d8c0b3f3c26acbe9b050feeb82910d68411162a4e94579b456906affd6a07472120edab2a227e124c0d19c5608180eaccc129708b77918b80654dcbd9
7
+ data.tar.gz: f94bafff68f9f50ae11589c62fd7aa6899836bf0ba43d070d4a7268f366392ab4736731283aae14d977ab3c53aee80eb60992c85ea81596f7cc3a36bf8c020c4
@@ -4,5 +4,8 @@
4
4
  },
5
5
  "[markdown]": {
6
6
  "editor.defaultFormatter": "esbenp.prettier-vscode"
7
+ },
8
+ "[github-actions-workflow]": {
9
+ "editor.defaultFormatter": "redhat.vscode-yaml"
7
10
  }
8
11
  }
data/CHANGELOG.md ADDED
@@ -0,0 +1,21 @@
1
+ # [v0.0.14.1] - 20-12-2025
2
+ ## What's Changed
3
+ * Cut v0.0.12 by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/78
4
+ * Bump magnus from 0.8.0 to 0.8.1 in the cargo group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/83
5
+ * Bump actions/checkout from 4 to 5 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/82
6
+ * Bump standard from 1.50.0 to 1.51.1 in the bundler-dependencies group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/81
7
+ * Bump actions/upload-artifact from 4 to 5 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/84
8
+ * Bump magnus from 0.8.1 to 0.8.2 in the cargo group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/85
9
+ * Bump the bundler-dependencies group with 2 updates by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/86
10
+ * Support by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/87
11
+ * Bump actions/checkout from 5 to 6 in the github-actions group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/89
12
+ * Bump standard from 1.51.1 to 1.52.0 in the bundler-dependencies group by @dependabot[bot] in https://github.com/IAPark/tiktoken_ruby/pull/88
13
+ * release GVL while encoding / decoding tokens by @tenderworks in https://github.com/IAPark/tiktoken_ruby/pull/90
14
+ * Drop Ruby 3.1 support; automate release process by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/92
15
+ * Rewrite history by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/93
16
+ * Force workflow rebuild by @gjtorikian in https://github.com/IAPark/tiktoken_ruby/pull/94
17
+
18
+ ## New Contributors
19
+ * @tenderworks made their first contribution in https://github.com/IAPark/tiktoken_ruby/pull/90
20
+
21
+ **Full Changelog**: https://github.com/IAPark/tiktoken_ruby/compare/v0.0.12...v0.0.14.1
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tiktoken_ruby (0.0.13)
4
+ tiktoken_ruby (0.0.14.1)
5
5
  rb_sys (~> 0.9)
6
6
 
7
7
  GEM
@@ -9,15 +9,15 @@ GEM
9
9
  specs:
10
10
  ast (2.4.3)
11
11
  diff-lcs (1.6.2)
12
- json (2.15.0)
12
+ json (2.16.0)
13
13
  language_server-protocol (3.17.0.5)
14
14
  lint_roller (1.1.0)
15
15
  minitest (5.25.5)
16
16
  parallel (1.27.0)
17
- parser (3.3.9.0)
17
+ parser (3.3.10.0)
18
18
  ast (~> 2.4.1)
19
19
  racc
20
- prism (1.5.1)
20
+ prism (1.6.0)
21
21
  racc (1.8.1)
22
22
  rainbow (3.1.1)
23
23
  rake (13.3.1)
@@ -40,7 +40,7 @@ GEM
40
40
  diff-lcs (>= 1.2.0, < 2.0)
41
41
  rspec-support (~> 3.13.0)
42
42
  rspec-support (3.13.6)
43
- rubocop (1.80.2)
43
+ rubocop (1.81.7)
44
44
  json (~> 2.3)
45
45
  language_server-protocol (~> 3.17.0.2)
46
46
  lint_roller (~> 1.1.0)
@@ -48,10 +48,10 @@ GEM
48
48
  parser (>= 3.3.0.2)
49
49
  rainbow (>= 2.2.2, < 4.0)
50
50
  regexp_parser (>= 2.9.3, < 3.0)
51
- rubocop-ast (>= 1.46.0, < 2.0)
51
+ rubocop-ast (>= 1.47.1, < 2.0)
52
52
  ruby-progressbar (~> 1.7)
53
53
  unicode-display_width (>= 2.4.0, < 4.0)
54
- rubocop-ast (1.47.1)
54
+ rubocop-ast (1.48.0)
55
55
  parser (>= 3.3.7.2)
56
56
  prism (~> 1.4)
57
57
  rubocop-performance (1.25.0)
@@ -59,10 +59,10 @@ GEM
59
59
  rubocop (>= 1.75.0, < 2.0)
60
60
  rubocop-ast (>= 1.38.0, < 2.0)
61
61
  ruby-progressbar (1.13.0)
62
- standard (1.51.1)
62
+ standard (1.52.0)
63
63
  language_server-protocol (~> 3.17.0.2)
64
64
  lint_roller (~> 1.0)
65
- rubocop (~> 1.80.2)
65
+ rubocop (~> 1.81.7)
66
66
  standard-custom (~> 1.0.0)
67
67
  standard-performance (~> 1.8)
68
68
  standard-custom (1.0.2)
data/README.md CHANGED
@@ -36,6 +36,39 @@ enc = Tiktoken.encoding_for_model("gpt-4")
36
36
  enc.encode("hello world").length #=> 2
37
37
  ```
38
38
 
39
+ ### Encoding methods
40
+
41
+ There are three methods for encoding text:
42
+
43
+ - `encode_ordinary(text)` - Encodes text, always treating special tokens as ordinary text
44
+ - `encode(text, allowed_special: [])` - Encodes text, treating special tokens as text unless listed in `allowed_special`
45
+ - `encode_with_special_tokens(text)` - Encodes text, recognizing and parsing all special tokens
46
+
47
+ **Special tokens** are control sequences used by OpenAI models, such as `<|endoftext|>`, `<|fim_prefix|>`, `<|fim_middle|>`, and `<|fim_suffix|>`. The encoding methods differ in how they handle these sequences:
48
+
49
+ ```ruby
50
+ enc = Tiktoken.get_encoding("cl100k_base")
51
+ text = "Hello<|endoftext|>World"
52
+
53
+ # encode_ordinary: treats <|endoftext|> as literal characters (9 tokens)
54
+ enc.encode_ordinary(text)
55
+ #=> [9906, 27, 91, 8862, 728, 428, 91, 29, 10343]
56
+
57
+ # encode: same as encode_ordinary by default
58
+ enc.encode(text)
59
+ #=> [9906, 27, 91, 8862, 728, 428, 91, 29, 10343]
60
+
61
+ # encode with allowed_special: recognizes the specified special token (3 tokens)
62
+ enc.encode(text, allowed_special: ["<|endoftext|>"])
63
+ #=> [9906, 100257, 10343]
64
+
65
+ # encode_with_special_tokens: recognizes ALL special tokens (3 tokens)
66
+ enc.encode_with_special_tokens(text)
67
+ #=> [9906, 100257, 10343]
68
+ ```
69
+
70
+ All methods round-trip correctly through `decode`.
71
+
39
72
  ## Development
40
73
 
41
74
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
Binary file
Binary file
Binary file
@@ -40,6 +40,13 @@ class Tiktoken::Encoding
40
40
  @ext_base_bpe.encode(text, allowed_special)
41
41
  end
42
42
 
43
+ # Encodes the text as a list of integer tokens, including special tokens.
44
+ # @param text [String] The text to encode
45
+ # @return [Array<Integer>] The encoded tokens
46
+ def encode_with_special_tokens(text)
47
+ @ext_base_bpe.encode_with_special_tokens(text)
48
+ end
49
+
43
50
  # Decodes the tokens back into text
44
51
  # @param tokens [Array<Integer>] The tokens to decode
45
52
  # @return [String] The decoded text
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tiktoken
4
- VERSION = "0.0.13"
4
+ VERSION = "0.0.14.1"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiktoken_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14.1
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - IAPark
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-11-10 00:00:00.000000000 Z
11
+ date: 2025-12-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
14
14
  used by OpenAI. It can be used to count the number of tokens in text before sending
@@ -22,6 +22,7 @@ files:
22
22
  - ".rspec"
23
23
  - ".standard.yml"
24
24
  - ".vscode/settings.json"
25
+ - CHANGELOG.md
25
26
  - Gemfile
26
27
  - Gemfile.lock
27
28
  - LICENSE.txt
@@ -34,7 +35,6 @@ files:
34
35
  - lib/tiktoken_ruby/3.4/tiktoken_ruby.so
35
36
  - lib/tiktoken_ruby/encoding.rb
36
37
  - lib/tiktoken_ruby/version.rb
37
- - script/release
38
38
  - sig/tiktoken_ruby.rbs
39
39
  homepage: https://github.com/IAPark/tiktoken_ruby
40
40
  licenses:
data/script/release DELETED
@@ -1,43 +0,0 @@
1
- #!/usr/bin/env bash
2
-
3
- set -e
4
-
5
- if [ -z "${TIKTOKEN_PUBLISH_KEY}" ]; then
6
- echo "Error: TIKTOKEN_PUBLISH_KEY is not set. This is the RubyGems API key to push the gem."
7
- exit 1
8
- fi
9
-
10
- run_id=""
11
- # Parse arguments
12
- while [[ "$#" -gt 0 ]]; do
13
- case $1 in
14
- --run-id)
15
- run_id="$2"
16
- shift 2
17
- ;;
18
- *)
19
- echo "Unknown parameter passed: $1"
20
- exit 1
21
- ;;
22
- esac
23
- done
24
-
25
- if [ -z "${run_id}" ]; then
26
- echo "Error: --run-id is not provided. Please provide the GitHub Action run id for the cross-compile workflow."
27
- exit 1
28
- fi
29
-
30
- version=$(grep VERSION lib/tiktoken_ruby/version.rb | head -n 1 | cut -d'"' -f2)
31
- echo "Building tiktoken_ruby v$version, using artifacts from run $run_id"
32
-
33
- rm -rf pkg/cross-compiled
34
- gh run download "$run_id" -D pkg/cross-compiled
35
-
36
- for gem in pkg/cross-compiled/cross-gem-*/tiktoken_ruby-"$version"*.gem ; do
37
- echo "Publishing $gem"
38
- GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "$gem" --host https://rubygems.org
39
- done
40
-
41
- # last but not least, the uncompiled gem
42
- bundle exec rake package
43
- GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "pkg/tiktoken_ruby-$version.gem" --host https://rubygems.org