token_estimator 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4eb2770739cb655fed20189d0f63414c489d8d943066870b3cb6e18260d7523f
4
- data.tar.gz: 367201d551861d9fb49a398baddc3537c87fa4c929ee3a5d82ee3f7c501a3dc1
3
+ metadata.gz: e76555160bf7038e96625963f99f434979da61bab11f70c0e7293d83db8c1588
4
+ data.tar.gz: 46d04371c15c88c39d96f87223c7a0713f9ceae367c52a48b50ca59b57d417e5
5
5
  SHA512:
6
- metadata.gz: 00a175350809995a880b31b79db68fb2bfc184f4da9e180b89438959a4b39d17e01e169ca1896f97b984a36284712260d7e7f434d2bd12e3c4ee60599cb38280
7
- data.tar.gz: dae8df4734be69cea8760a642c3e764d25340cf7b27058e831c3e72fc999b3a24cc93ea3c0706cf7fe0b937f6602c63d5649a5ff3cad4bfa06e6df860b31703a
6
+ metadata.gz: 110fb3caff83e609a5b4fa9e28ff92c1b0a866d9f30b669ea72b071e36f80e96377c451581dd94fa1155280311c9fd2fbb9ab5e9516c66c6c6b507aaba4972c7
7
+ data.tar.gz: 339ca2a1c63812c424cc2d1771d4dd665151ca069965511e469dd17587764a31957f463a2c5f5222e677413c9b1117f5331c1bd2762bd4d7c96260496e586f94
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.1] - 2024-07-15
4
+
5
+ - Added `TokenEstimator::Estimator::SUPPORTED_FILE_TYPES` method to specify supported file types.
6
+
3
7
  ## [0.1.0] - 2024-07-11
4
8
 
5
9
  - Initial release
data/README.md CHANGED
@@ -72,6 +72,9 @@ Counts tokens from a JSON object.
72
72
  #### `count_tokens_from_html`
73
73
  Counts tokens from an HTML string.
74
74
 
75
+ #### `TokenEstimator::Estimator::SUPPORTED_FILE_TYPES`
76
+ Return the supported file types.
77
+
75
78
  ## Roadmap
76
79
  Here is a checklist of the formats we currently support for token counting and those we plan to support in the future:
77
80
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module TokenEstimator
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.2"
5
5
  end
@@ -13,6 +13,8 @@ module TokenEstimator
13
13
  class UnsupportedFileTypeError < StandardError; end
14
14
 
15
15
  class Estimator
16
+ SUPPORTED_FILE_TYPES = [".txt", ".csv", ".pdf", ".json", ".md", ".html", ".xlsx"]
17
+
16
18
  def initialize(tokenizer_name)
17
19
  @tokenizer = Tokenizers.from_pretrained(tokenizer_name)
18
20
  end
@@ -95,10 +97,6 @@ module TokenEstimator
95
97
  tokens.count
96
98
  end
97
99
 
98
- def supported_file_types
99
- [".txt", ".csv", ".pdf", ".json", ".md", ".html", ".xlsx"]
100
- end
101
-
102
100
  private
103
101
 
104
102
  def extract_text_from_excel(xlsx)
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: token_estimator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - aemabit
@@ -139,6 +139,7 @@ files:
139
139
  - lib/token_estimator/version.rb
140
140
  - sig/token_estimator.rbs
141
141
  - token_estimator-0.1.0.gem
142
+ - token_estimator-0.1.1.gem
142
143
  homepage: https://github.com/aemabit/token_estimator
143
144
  licenses:
144
145
  - MIT