baran 0.1.9 → 0.1.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 51599bea086ef467f089b64f9252aa87a881c1889c776219695cec4922318be8
4
- data.tar.gz: b9c20815e90bf477c8b98b3fb2580013c791dcf1a387332552bf51ae33882768
3
+ metadata.gz: 95a0c57558fc237d12ab005d24e444381725f8ddbb1fbbfe1ee730f9e14384ba
4
+ data.tar.gz: a7eacd8e62b27478df98aaaf534a6b37348696d2baa78e67c5781891c1b74c03
5
5
  SHA512:
6
- metadata.gz: 8add731f1eb06baa85e7bfe495f11ce58be02b8ca6947f3f0bbfc1d7ac85cad3b693d24dc7f4f9124e5a357f784d5f42eb6875e36d7d2fb1aac09dda22dd8862
7
- data.tar.gz: 54e8924c8c2c86f524805b99cbeab0bb77a2784d72191fd35857def7450f9e9389176a63d684c4346c17787ad34bd7f3c9155b3803a6be2d0b22de24691a9ed2
6
+ metadata.gz: f09dd858f1dee1189ee543b440e8196871129fe33b6558762aedc85a1c9a26aebb1e439ff7c9b9e52bb0d92f6b1ffff37207105a146928c9350b26ed949019cd
7
+ data.tar.gz: 59ec6d83b1b7ce85e005dee095e8baf087ffffef750851a661957b431ef346521d75661cd266cf81e889eae64966b4876b3b8031ce43987fead5c2678437aace
data/CHANGELOG.md CHANGED
@@ -8,7 +8,6 @@
8
8
 
9
9
  - Update README
10
10
 
11
-
12
11
  ## [0.1.2] - 2023-05-28
13
12
 
14
13
  - Fix README
@@ -24,4 +23,8 @@
24
23
 
25
24
  ## [0.1.5] - 2023-06-02
26
25
 
27
- - Refactor
26
+ - Refactor
27
+
28
+ ## [0.1.10] - 2024-03-09
29
+
30
+ - https://github.com/kawakamimoeki/baran/pull/17
data/Gemfile CHANGED
@@ -5,6 +5,6 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in baran.gemspec
6
6
  gemspec
7
7
 
8
- gem "minitest", "~> 5.20"
8
+ gem "minitest", "~> 5.21"
9
9
 
10
10
  gem "rake", "~> 13.0"
data/Gemfile.lock CHANGED
@@ -1,12 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- baran (0.1.9)
4
+ baran (0.1.11)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
- minitest (5.20.0)
9
+ minitest (5.21.2)
10
10
  rake (13.0.6)
11
11
 
12
12
  PLATFORMS
@@ -15,7 +15,7 @@ PLATFORMS
15
15
 
16
16
  DEPENDENCIES
17
17
  baran!
18
- minitest (~> 5.20)
18
+ minitest (~> 5.21)
19
19
  rake (~> 13.0)
20
20
 
21
21
  BUNDLED WITH
data/README.md CHANGED
@@ -35,8 +35,8 @@ splitter = Baran::CharacterTextSplitter.new(
35
35
  chunk_overlap: 64,
36
36
  separator: "\n\n"
37
37
  )
38
- splitter.chunks(text)
39
- # => [{ cursor: 0, text: "..." }, ...]
38
+ splitter.chunks(text, metadata: { ... })
39
+ # => [{ cursor: 0, text: "...", metadata: { ... } }, ...]
40
40
  ```
41
41
 
42
42
  ### Recursive Character Text Splitter
@@ -47,8 +47,8 @@ Splitting by the specified characters recursively.
47
47
  splitter = Baran::RecursiveCharacterTextSplitter.new(
48
48
  separators: ["\n\n", "\n", " ", ""]
49
49
  )
50
- splitter.chunks(text)
51
- # => [{ cursor: 0, text: "..." }, ...]
50
+ splitter.chunks(text, metadata: { ... })
51
+ # => [{ cursor: 0, text: "...", metadata: { ... } }, ...]
52
52
  ```
53
53
 
54
54
  ### Markdown Text Splitter
@@ -57,8 +57,8 @@ Splitting by the Markdown descriptions.
57
57
 
58
58
  ```ruby
59
59
  splitter = Baran::MarkdownSplitter.new
60
- splitter.chunks(markdown)
61
- # => [{ cursor: 0, text: "..." }, ...]
60
+ splitter.chunks(markdown, metadata: { ... })
61
+ # => [{ cursor: 0, text: "...", metadata: { ... } }, ...]
62
62
  ```
63
63
 
64
64
  Split with the following priority.
@@ -3,7 +3,7 @@ require_relative './recursive_character_text_splitter'
3
3
  module Baran
4
4
  class MarkdownSplitter < RecursiveCharacterTextSplitter
5
5
  def initialize(chunk_size: 1024, chunk_overlap: 64)
6
- @separators = [
6
+ separators = [
7
7
  "\n# ", # h1
8
8
  "\n## ", # h2
9
9
  "\n### ", # h3
@@ -19,7 +19,7 @@ module Baran
19
19
  " ", # space
20
20
  "" # empty
21
21
  ]
22
- super(chunk_size: chunk_size, chunk_overlap: chunk_overlap)
22
+ super(chunk_size: chunk_size, chunk_overlap: chunk_overlap, separators: separators)
23
23
  end
24
24
  end
25
- end
25
+ end
data/lib/baran/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Baran
4
- VERSION = "0.1.9"
4
+ VERSION = "0.1.11"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baran
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Moeki Kawakami
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-26 00:00:00.000000000 Z
11
+ date: 2024-03-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Text Splitter for Large Language Model Datasets.
14
14
  email: