baran 0.1.9 → 0.1.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51599bea086ef467f089b64f9252aa87a881c1889c776219695cec4922318be8
4
- data.tar.gz: b9c20815e90bf477c8b98b3fb2580013c791dcf1a387332552bf51ae33882768
3
+ metadata.gz: f6cd3f94a89b19931ba237a67a23119a23549a70a200d6074e072d4ccd84f459
4
+ data.tar.gz: e395826b7cde638a330dfbf8f66a0864053bf96cebd0c49127706d0b1556bee6
5
5
  SHA512:
6
- metadata.gz: 8add731f1eb06baa85e7bfe495f11ce58be02b8ca6947f3f0bbfc1d7ac85cad3b693d24dc7f4f9124e5a357f784d5f42eb6875e36d7d2fb1aac09dda22dd8862
7
- data.tar.gz: 54e8924c8c2c86f524805b99cbeab0bb77a2784d72191fd35857def7450f9e9389176a63d684c4346c17787ad34bd7f3c9155b3803a6be2d0b22de24691a9ed2
6
+ metadata.gz: 421472e67d0279cac41ce4a36db642dadc3ca5b1b3f1f6cde6cb86e7d08038eec007e32b622e099fd6169a23cca2b25fd994598cfabd154901edfee74fe22ce4
7
+ data.tar.gz: 54e61bb7255e7269a06aa8df912d8a839c3c124dc4f2f73bba502070d07eafa3d04728848842b80005e4a0cfe3fcd0fb6244ee413ac1c6e283e87c2e2f21f451
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- baran (0.1.9)
4
+ baran (0.1.10)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -35,8 +35,8 @@ splitter = Baran::CharacterTextSplitter.new(
35
35
  chunk_overlap: 64,
36
36
  separator: "\n\n"
37
37
  )
38
- splitter.chunks(text)
39
- # => [{ cursor: 0, text: "..." }, ...]
38
+ splitter.chunks(text, metadata: { ... })
39
+ # => [{ cursor: 0, text: "...", metadata: { ... } }, ...]
40
40
  ```
41
41
 
42
42
  ### Recursive Character Text Splitter
@@ -47,8 +47,8 @@ Splitting by the specified characters recursively.
47
47
  splitter = Baran::RecursiveCharacterTextSplitter.new(
48
48
  separators: ["\n\n", "\n", " ", ""]
49
49
  )
50
- splitter.chunks(text)
51
- # => [{ cursor: 0, text: "..." }, ...]
50
+ splitter.chunks(text, metadata: { ... })
51
+ # => [{ cursor: 0, text: "...", metadata: { ... } }, ...]
52
52
  ```
53
53
 
54
54
  ### Markdown Text Splitter
@@ -57,8 +57,8 @@ Splitting by the Markdown descriptions.
57
57
 
58
58
  ```ruby
59
59
  splitter = Baran::MarkdownSplitter.new
60
- splitter.chunks(markdown)
61
- # => [{ cursor: 0, text: "..." }, ...]
60
+ splitter.chunks(markdown, metadata: { ... })
61
+ # => [{ cursor: 0, text: "...", metadata: { ... } }, ...]
62
62
  ```
63
63
 
64
64
  Split with the following priority.
@@ -3,7 +3,7 @@ require_relative './recursive_character_text_splitter'
3
3
  module Baran
4
4
  class MarkdownSplitter < RecursiveCharacterTextSplitter
5
5
  def initialize(chunk_size: 1024, chunk_overlap: 64)
6
- @separators = [
6
+ separators = [
7
7
  "\n# ", # h1
8
8
  "\n## ", # h2
9
9
  "\n### ", # h3
@@ -19,7 +19,7 @@ module Baran
19
19
  " ", # space
20
20
  "" # empty
21
21
  ]
22
- super(chunk_size: chunk_size, chunk_overlap: chunk_overlap)
22
+ super(chunk_size: chunk_size, chunk_overlap: chunk_overlap, separators: separators)
23
23
  end
24
24
  end
25
- end
25
+ end
data/lib/baran/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Baran
4
- VERSION = "0.1.9"
4
+ VERSION = "0.1.10"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baran
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Moeki Kawakami
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-26 00:00:00.000000000 Z
11
+ date: 2023-11-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Text Splitter for Large Language Model Datasets.
14
14
  email: