bx_builder_chain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +13 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +22 -0
  6. data/Gemfile.lock +120 -0
  7. data/README.md +74 -0
  8. data/Rakefile +12 -0
  9. data/bx_builder_chain.gemspec +35 -0
  10. data/lib/bx_builder_chain/chunker/recursive_text.rb +38 -0
  11. data/lib/bx_builder_chain/chunker/text.rb +38 -0
  12. data/lib/bx_builder_chain/configuration.rb +21 -0
  13. data/lib/bx_builder_chain/data.rb +28 -0
  14. data/lib/bx_builder_chain/dependency_helper.rb +22 -0
  15. data/lib/bx_builder_chain/llm/base.rb +64 -0
  16. data/lib/bx_builder_chain/llm/open_ai.rb +191 -0
  17. data/lib/bx_builder_chain/loader.rb +144 -0
  18. data/lib/bx_builder_chain/processors/base.rb +21 -0
  19. data/lib/bx_builder_chain/processors/csv.rb +27 -0
  20. data/lib/bx_builder_chain/processors/docx.rb +25 -0
  21. data/lib/bx_builder_chain/processors/html.rb +29 -0
  22. data/lib/bx_builder_chain/processors/json.rb +17 -0
  23. data/lib/bx_builder_chain/processors/pdf.rb +26 -0
  24. data/lib/bx_builder_chain/processors/text.rb +17 -0
  25. data/lib/bx_builder_chain/processors/xlsx.rb +31 -0
  26. data/lib/bx_builder_chain/utils/token_data/cl100k_base.tiktoken +100256 -0
  27. data/lib/bx_builder_chain/utils/token_length/base_validator.rb +45 -0
  28. data/lib/bx_builder_chain/utils/token_length/open_ai_validator.rb +70 -0
  29. data/lib/bx_builder_chain/utils/tokenization/byte_pair_encoding.rb +72 -0
  30. data/lib/bx_builder_chain/utils/tokenization/open_ai_encodings.rb +44 -0
  31. data/lib/bx_builder_chain/vectorsearch/base.rb +160 -0
  32. data/lib/bx_builder_chain/vectorsearch/pgvector.rb +228 -0
  33. data/lib/bx_builder_chain/version.rb +5 -0
  34. data/lib/bx_builder_chain.rb +38 -0
  35. data/lib/generators/bx_builder_chain/install_generator.rb +42 -0
  36. data/lib/generators/bx_builder_chain/templates/app/admin/bx_builder_chain_document.rb +65 -0
  37. data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/documents_controller.rb +65 -0
  38. data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/questions_controller.rb +33 -0
  39. data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/test_controller.rb +10 -0
  40. data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/document.rb +26 -0
  41. data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/document_chunk.rb +9 -0
  42. data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/embedding.rb +9 -0
  43. data/lib/generators/bx_builder_chain/templates/app/services/bx_builder_chain/document_upload_service.rb +47 -0
  44. data/lib/generators/bx_builder_chain/templates/app/services/bx_builder_chain/question_asking_service.rb +35 -0
  45. data/lib/generators/bx_builder_chain/templates/app/views/bx_builder_chain/test/form.html.erb +164 -0
  46. data/lib/generators/bx_builder_chain/templates/app/workers/bx_builder_chain/document_processor_worker.rb +32 -0
  47. data/lib/generators/bx_builder_chain/templates/initializer.rb +12 -0
  48. data/lib/generators/bx_builder_chain/templates/migration.rb +33 -0
  49. data/lib/pgvector/pg/binary_decoder/vector.rb +14 -0
  50. data/lib/pgvector/pg/text_decoder/vector.rb +12 -0
  51. data/lib/pgvector/pg.rb +10 -0
  52. data/lib/pgvector.rb +11 -0
  53. data/lib/sequel/plugins/pgvector/class_methods.rb +47 -0
  54. data/lib/sequel/plugins/pgvector/instance_methods.rb +34 -0
  55. data/lib/sequel/plugins/pgvector.rb +12 -0
  56. data/sig/bx_langchain_chat.rbs +4 -0
  57. metadata +238 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 550493404afe4cddc9999dc3071a0bc507052438409d320ee143a2711b6711a0
4
+ data.tar.gz: 75238a1638cc5970a110c14b3cdc914a9628c5c338d50317c698910a167f1a6f
5
+ SHA512:
6
+ metadata.gz: 819f64ad09e864b4f008dd60b90857140a2d853b3adc145477f0e22da235ebfc558ad8f42c32345be8621c9ea1a7d6b00b3bedf06ca4b5dac8fb28fbb2a5e279
7
+ data.tar.gz: 40903aae567fc836e12ae05f0120ff44a3bbd36b213612abf35a20e7169236c5aff58655ccd65a452297d450798bf2e8d8187e54efac58784558e359e71e1699
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,13 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.6
3
+
4
+ Style/StringLiterals:
5
+ Enabled: true
6
+ EnforcedStyle: double_quotes
7
+
8
+ Style/StringLiteralsInInterpolation:
9
+ Enabled: true
10
+ EnforcedStyle: double_quotes
11
+
12
+ Layout/LineLength:
13
+ Max: 120
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2023-08-17
4
+
5
+ - Initial release
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in bx_builder_chain.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
9
+ gem "rspec", "~> 3.0"
10
+ gem "rubocop", "~> 1.21"
11
+ gem "sequel", "~> 5.71"
12
+ gem "pg"
13
+ gem 'dotenv'
14
+ # gem "pgvector", path: '../pgvector-ruby'
15
+ gem "ruby-openai"
16
+ gem "pdf-reader"
17
+ gem "nokogiri"
18
+ gem "docx"
19
+ gem "roo"
20
+ gem "baran"
21
+
22
+ gem "pry"
data/Gemfile.lock ADDED
@@ -0,0 +1,120 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ bx_builder_chain (0.1.0)
5
+ baran (= 0.1.7)
6
+ docx
7
+ dotenv
8
+ nokogiri
9
+ pdf-reader
10
+ pg
11
+ roo
12
+ ruby-openai
13
+ sequel (~> 5.71)
14
+ zeitwerk (= 2.6.11)
15
+
16
+ GEM
17
+ remote: https://rubygems.org/
18
+ specs:
19
+ Ascii85 (1.1.0)
20
+ afm (0.2.2)
21
+ ast (2.4.2)
22
+ baran (0.1.7)
23
+ coderay (1.1.3)
24
+ diff-lcs (1.5.0)
25
+ docx (0.8.0)
26
+ nokogiri (~> 1.13, >= 1.13.0)
27
+ rubyzip (~> 2.0)
28
+ dotenv (2.8.1)
29
+ faraday (2.7.10)
30
+ faraday-net_http (>= 2.0, < 3.1)
31
+ ruby2_keywords (>= 0.0.4)
32
+ faraday-multipart (1.0.4)
33
+ multipart-post (~> 2)
34
+ faraday-net_http (3.0.2)
35
+ hashery (2.1.2)
36
+ json (2.6.3)
37
+ method_source (1.0.0)
38
+ multipart-post (2.3.0)
39
+ nokogiri (1.13.10-arm64-darwin)
40
+ racc (~> 1.4)
41
+ parallel (1.23.0)
42
+ parser (3.2.2.3)
43
+ ast (~> 2.4.1)
44
+ racc
45
+ pdf-reader (2.11.0)
46
+ Ascii85 (~> 1.0)
47
+ afm (~> 0.2.1)
48
+ hashery (~> 2.0)
49
+ ruby-rc4
50
+ ttfunk
51
+ pg (1.5.3)
52
+ pry (0.14.2)
53
+ coderay (~> 1.1)
54
+ method_source (~> 1.0)
55
+ racc (1.7.1)
56
+ rainbow (3.1.1)
57
+ rake (13.0.6)
58
+ regexp_parser (2.8.1)
59
+ rexml (3.2.6)
60
+ roo (2.8.3)
61
+ nokogiri (~> 1)
62
+ rubyzip (>= 1.3.0, < 3.0.0)
63
+ rspec (3.12.0)
64
+ rspec-core (~> 3.12.0)
65
+ rspec-expectations (~> 3.12.0)
66
+ rspec-mocks (~> 3.12.0)
67
+ rspec-core (3.12.2)
68
+ rspec-support (~> 3.12.0)
69
+ rspec-expectations (3.12.3)
70
+ diff-lcs (>= 1.2.0, < 2.0)
71
+ rspec-support (~> 3.12.0)
72
+ rspec-mocks (3.12.6)
73
+ diff-lcs (>= 1.2.0, < 2.0)
74
+ rspec-support (~> 3.12.0)
75
+ rspec-support (3.12.1)
76
+ rubocop (1.50.2)
77
+ json (~> 2.3)
78
+ parallel (~> 1.10)
79
+ parser (>= 3.2.0.0)
80
+ rainbow (>= 2.2.2, < 4.0)
81
+ regexp_parser (>= 1.8, < 3.0)
82
+ rexml (>= 3.2.5, < 4.0)
83
+ rubocop-ast (>= 1.28.0, < 2.0)
84
+ ruby-progressbar (~> 1.7)
85
+ unicode-display_width (>= 2.4.0, < 3.0)
86
+ rubocop-ast (1.29.0)
87
+ parser (>= 3.2.1.0)
88
+ ruby-openai (5.0.0)
89
+ faraday (>= 1)
90
+ faraday-multipart (>= 1)
91
+ ruby-progressbar (1.13.0)
92
+ ruby-rc4 (0.1.5)
93
+ ruby2_keywords (0.0.5)
94
+ rubyzip (2.3.2)
95
+ sequel (5.71.0)
96
+ ttfunk (1.7.0)
97
+ unicode-display_width (2.4.2)
98
+ zeitwerk (2.6.11)
99
+
100
+ PLATFORMS
101
+ arm64-darwin-22
102
+
103
+ DEPENDENCIES
104
+ baran
105
+ bx_builder_chain!
106
+ docx
107
+ dotenv
108
+ nokogiri
109
+ pdf-reader
110
+ pg
111
+ pry
112
+ rake (~> 13.0)
113
+ roo
114
+ rspec (~> 3.0)
115
+ rubocop (~> 1.21)
116
+ ruby-openai
117
+ sequel (~> 5.71)
118
+
119
+ BUNDLED WITH
120
+ 2.4.7
data/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # BxBuilderChain
2
+
3
+ This gem / building block allows builder apps to use open ai.
4
+ The following features are included:
5
+ - OpenAi completion with GPT 3.5 and GPT 4
6
+ - Q&A / additional prompt context with a users private documents
7
+ - Documents can be stored on a global and user/group level
8
+
9
+ ## Installation
10
+
11
+ Install the gem and add to the application's Gemfile by executing:
12
+
13
+ $ bundle add bx_builder_chain
14
+
15
+ followed by
16
+
17
+ $ bundle install
18
+
19
+ If bundler is not being used to manage dependencies, install the gem by executing:
20
+
21
+ $ gem install bx_builder_chain
22
+
23
+ TODO: add rake task to create db structure
24
+ ### Optional
25
+
26
+ generate the endpoint & Active admin contollers for Builder Chain
27
+
28
+ $ rails generate builder_chain:endpoints
29
+
30
+ this will add the following endpoint controllers
31
+ - File upload
32
+ - OpenAi ask / completion
33
+ - ActiveAdmin documents controller
34
+
35
+ ## Usage
36
+ ```ruby
37
+ require "bx_builder_chain"
38
+ ```
39
+
40
+ create the llm and client
41
+ ```ruby
42
+ llm = BxBuilderChain::Llm::OpenAi.new(api_key: 'open-ai-api-key')
43
+ client = BxBuilderChain::Vectorsearch::Pgvector.new(
44
+ url: 'postgres://postgres:password@localhost:5432/test', # postgres db url
45
+ table_name: "embeddings", # table name for the documents to be stored
46
+ llm: llm,
47
+ namespace: user_id # default is nil, nil is used for global public documents
48
+ )
49
+ ```
50
+
51
+ Add documents to the Vector Store
52
+
53
+ ```ruby
54
+ # Store plain texts in your vector search database
55
+ client.add_texts(
56
+ texts: [
57
+ "Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
58
+ "In a bowl, combine the spinach mixture with 4 ounces of softened cream cheese, 1/4 cup of grated Parmesan cheese, 1/4 cup of shredded mozzarella cheese, and 1/4 teaspoon of red pepper flakes. Mix until well combined. Stuff each chicken breast pocket with an equal amount of the spinach mixture. Seal the pocket with a toothpick if necessary. In the same skillet, heat 1 tablespoon of olive oil over medium-high heat. Add the stuffed chicken breasts and sear on each side for 3-4 minutes, or until golden brown."
59
+ ]
60
+ )
61
+ ```
62
+ ```ruby
63
+ # Store the contents of your files in your vector search database
64
+ my_pdf = "path/to/my.pdf"
65
+ my_text = "path/to/my.txt"
66
+ my_docx = "path/to/my.docx"
67
+
68
+ client.add_data(paths: [my_pdf, my_text, my_docx])
69
+ ```
70
+
71
+ Then ask the question
72
+ ```ruby
73
+ client.ask(question: "What is Frogger?")
74
+ ```
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/bx_builder_chain/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "bx_builder_chain"
7
+ spec.version = BxBuilderChain::VERSION
8
+ spec.authors = ["Paul Ketelle"]
9
+ spec.email = ["paul.ketelle@builder.ai"]
10
+
11
+ spec.summary = "Write a short summary, because RubyGems requires one."
12
+ spec.description = "Write a longer description or delete this line."
13
+ spec.license = "MIT"
14
+ spec.required_ruby_version = ">= 2.6.0"
15
+
16
+ spec.files = Dir.chdir(__dir__) do
17
+ `git ls-files -z`.split("\x0").reject do |f|
18
+ (File.expand_path(f) == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
19
+ end
20
+ end
21
+ spec.bindir = "exe"
22
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
23
+ spec.require_paths = ["lib"]
24
+
25
+ spec.add_dependency "zeitwerk", "2.6.11"
26
+ spec.add_dependency "baran", "0.1.7"
27
+ spec.add_dependency "sequel", "~> 5.71"
28
+ spec.add_dependency "pg", "~> 1.5.3"
29
+ spec.add_dependency 'dotenv', "~> 2.8"
30
+ spec.add_dependency "ruby-openai", "~> 5.1.0"
31
+ spec.add_dependency "pdf-reader", "~> 2.11.0"
32
+ spec.add_dependency "nokogiri", "~> 1.8"
33
+ spec.add_dependency "docx", "~> 0.8.0"
34
+ spec.add_dependency "roo", "~> 2.8.3"
35
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "baran"
4
+
5
+ module BxBuilderChain
6
+ module Chunker
7
+ #
8
+ # Recursive text chunker. Preferentially splits on separators.
9
+ #
10
+ # Usage:
11
+ # BxBuilderChain::Chunker::RecursiveText.new(text).chunks
12
+ #
13
+ class RecursiveText
14
+ attr_reader :text, :chunk_size, :chunk_overlap, :separators
15
+
16
+ # @param [String] text
17
+ # @param [Integer] chunk_size
18
+ # @param [Integer] chunk_overlap
19
+ # @param [Array<String>] separators
20
+ def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n", "\n", ".", " ", ""])
21
+ @text = text
22
+ @chunk_size = chunk_size
23
+ @chunk_overlap = chunk_overlap
24
+ @separators = separators
25
+ end
26
+
27
+ # @return [Array<String>]
28
+ def chunks
29
+ splitter = Baran::RecursiveCharacterTextSplitter.new(
30
+ chunk_size: chunk_size,
31
+ chunk_overlap: chunk_overlap,
32
+ separators: separators
33
+ )
34
+ splitter.chunks(text)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "baran"
4
+
5
+ module BxBuilderChain
6
+ module Chunker
7
+ #
8
+ # Simple text chunker
9
+ #
10
+ # Usage:
11
+ # BxBuilderChain::Chunker::Text.new(text).chunks
12
+ #
13
+ class Text
14
+ attr_reader :text, :chunk_size, :chunk_overlap, :separator
15
+
16
+ # @param [String] text
17
+ # @param [Integer] chunk_size
18
+ # @param [Integer] chunk_overlap
19
+ # @param [String] separator
20
+ def initialize(text, chunk_size: 1024, chunk_overlap: 64, separator: "\n\n")
21
+ @text = text
22
+ @chunk_size = chunk_size
23
+ @chunk_overlap = chunk_overlap
24
+ @separator = separator
25
+ end
26
+
27
+ # @return [Array<String>]
28
+ def chunks
29
+ splitter = Baran::CharacterTextSplitter.new(
30
+ chunk_size: chunk_size,
31
+ chunk_overlap: chunk_overlap,
32
+ separator: separator
33
+ )
34
+ splitter.chunks(text)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,21 @@
1
+ # Inside your gem (e.g., in `lib/bx_builder_chain/vectorsearch/configuration.rb`)
2
+
3
+ module BxBuilderChain
4
+ class Configuration
5
+ attr_accessor :pg_url, :openai_api_key, :public_namespace, :threshold, :default_prompt_template
6
+
7
+ def initialize
8
+ @pg_url = ENV['DB_URL']
9
+ @openai_api_key = ENV['OPENAI_API_KEY']
10
+ @public_namespace = 'public'
11
+ @threshold = 0.25
12
+ @default_prompt_template = "Context information is below
13
+ --------------------
14
+ %{context}
15
+ --------------------
16
+ Given the context information and not prior knowledge
17
+ answer the question: %{question}"
18
+ end
19
+ end
20
+ end
21
+
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BxBuilderChain
4
+ # Abstraction for data loaded by a {BxBuilderChain::Loader}
5
+ class Data
6
+ # URL or Path of the data source
7
+ # @return [String]
8
+ attr_reader :source
9
+
10
+ # @param data [String] data that was loaded
11
+ # @option options [String] :source URL or Path of the data source
12
+ def initialize(data, options = {})
13
+ @source = options[:source]
14
+ @data = data
15
+ end
16
+
17
+ # @return [String]
18
+ def value
19
+ @data
20
+ end
21
+
22
+ # @param opts [Hash] options passed to the chunker
23
+ # @return [Array<String>]
24
+ def chunks(opts = {})
25
+ BxBuilderChain::Chunker::RecursiveText.new(@data, **opts).chunks
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BxBuilderChain
4
+ module DependencyHelper
5
+ class VersionError < ScriptError; end
6
+
7
+ # This method requires and loads the given gem, and then checks to see if the version of the gem meets the requirements listed in `langchain.gemspec`
8
+ # This solution was built to avoid auto-loading every single gem in the Gemfile when the developer will mostly likely be only using a few of them.
9
+ #
10
+ # @param gem_name [String] The name of the gem to load
11
+ # @return [Boolean] Whether or not the gem was loaded successfully
12
+ # @raise [LoadError] If the gem is not installed
13
+ # @raise [VersionError] If the gem is installed, but the version does not meet the requirements
14
+ #
15
+ def depends_on(gem_name)
16
+ Gem::Specification.find_by_name(gem_name)
17
+ true
18
+ rescue LoadError
19
+ raise LoadError, "!Could not load #{gem_name}. Please ensure that the #{gem_name} gem is installed."
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BxBuilderChain::Llm
4
+ class ApiError < StandardError; end
5
+
6
+ # A LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
7
+ #
8
+ # BxBuilderChain.rb provides a common interface to interact with all supported LLMs:
9
+ #
10
+ # - {BxBuilderChain::Llm::OpenAI}
11
+ #
12
+ # @abstract
13
+ class Base
14
+ include BxBuilderChain::DependencyHelper
15
+
16
+ # A client for communicating with the LLM
17
+ attr_reader :client
18
+
19
+ def default_dimension
20
+ self.class.const_get(:DEFAULTS).dig(:dimension)
21
+ end
22
+
23
+ #
24
+ # Generate a chat completion for a given prompt. Parameters will depend on the LLM
25
+ #
26
+ # @raise NotImplementedError if not supported by the LLM
27
+ def chat(**kwargs)
28
+ raise NotImplementedError, "#{self.class.name} does not support chat"
29
+ end
30
+
31
+ #
32
+ # Generate a completion for a given prompt. Parameters will depend on the LLM.
33
+ #
34
+ # @raise NotImplementedError if not supported by the LLM
35
+ def complete(**kwargs)
36
+ raise NotImplementedError, "#{self.class.name} does not support completion"
37
+ end
38
+
39
+ #
40
+ # Generate an embedding for a given text. Parameters depends on the LLM.
41
+ #
42
+ # @raise NotImplementedError if not supported by the LLM
43
+ #
44
+ def embed(**kwargs)
45
+ raise NotImplementedError, "#{self.class.name} does not support generating embeddings"
46
+ end
47
+
48
+ #
49
+ # Generate a summary for a given text. Parameters depends on the LLM.
50
+ #
51
+ # @raise NotImplementedError if not supported by the LLM
52
+ #
53
+ def summarize(**kwargs)
54
+ raise NotImplementedError, "#{self.class.name} does not support summarization"
55
+ end
56
+
57
+ def count_tokens(string)
58
+ tokens = string.scan(/[\w]+|[\W]/)
59
+ tokens = tokens.flat_map { |token| token.split(/(?<=[\W])$/) }
60
+
61
+ return (tokens.length*1.07).to_i
62
+ end
63
+ end
64
+ end