token_estimator 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4eb2770739cb655fed20189d0f63414c489d8d943066870b3cb6e18260d7523f
4
- data.tar.gz: 367201d551861d9fb49a398baddc3537c87fa4c929ee3a5d82ee3f7c501a3dc1
3
+ metadata.gz: e76555160bf7038e96625963f99f434979da61bab11f70c0e7293d83db8c1588
4
+ data.tar.gz: 46d04371c15c88c39d96f87223c7a0713f9ceae367c52a48b50ca59b57d417e5
5
5
  SHA512:
6
- metadata.gz: 00a175350809995a880b31b79db68fb2bfc184f4da9e180b89438959a4b39d17e01e169ca1896f97b984a36284712260d7e7f434d2bd12e3c4ee60599cb38280
7
- data.tar.gz: dae8df4734be69cea8760a642c3e764d25340cf7b27058e831c3e72fc999b3a24cc93ea3c0706cf7fe0b937f6602c63d5649a5ff3cad4bfa06e6df860b31703a
6
+ metadata.gz: 110fb3caff83e609a5b4fa9e28ff92c1b0a866d9f30b669ea72b071e36f80e96377c451581dd94fa1155280311c9fd2fbb9ab5e9516c66c6c6b507aaba4972c7
7
+ data.tar.gz: 339ca2a1c63812c424cc2d1771d4dd665151ca069965511e469dd17587764a31957f463a2c5f5222e677413c9b1117f5331c1bd2762bd4d7c96260496e586f94
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.1] - 2024-07-15
4
+
5
+ - Added `TokenEstimator::Estimator::SUPPORTED_FILE_TYPES` method to specify supported file types.
6
+
3
7
  ## [0.1.0] - 2024-07-11
4
8
 
5
9
  - Initial release
data/README.md CHANGED
@@ -72,6 +72,9 @@ Counts tokens from a JSON object.
72
72
  #### `count_tokens_from_html`
73
73
  Counts tokens from an HTML string.
74
74
 
75
+ #### `TokenEstimator::Estimator::SUPPORTED_FILE_TYPES`
76
+ Return the supported file types.
77
+
75
78
  ## Roadmap
76
79
  Here is a checklist of the formats we currently support for token counting and those we plan to support in the future:
77
80
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module TokenEstimator
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.2"
5
5
  end
@@ -13,6 +13,8 @@ module TokenEstimator
13
13
  class UnsupportedFileTypeError < StandardError; end
14
14
 
15
15
  class Estimator
16
+ SUPPORTED_FILE_TYPES = [".txt", ".csv", ".pdf", ".json", ".md", ".html", ".xlsx"]
17
+
16
18
  def initialize(tokenizer_name)
17
19
  @tokenizer = Tokenizers.from_pretrained(tokenizer_name)
18
20
  end
@@ -95,10 +97,6 @@ module TokenEstimator
95
97
  tokens.count
96
98
  end
97
99
 
98
- def supported_file_types
99
- [".txt", ".csv", ".pdf", ".json", ".md", ".html", ".xlsx"]
100
- end
101
-
102
100
  private
103
101
 
104
102
  def extract_text_from_excel(xlsx)
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: token_estimator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - aemabit
@@ -139,6 +139,7 @@ files:
139
139
  - lib/token_estimator/version.rb
140
140
  - sig/token_estimator.rbs
141
141
  - token_estimator-0.1.0.gem
142
+ - token_estimator-0.1.1.gem
142
143
  homepage: https://github.com/aemabit/token_estimator
143
144
  licenses:
144
145
  - MIT