baran 0.1.7 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +3 -3
- data/lib/baran/character_text_splitter.rb +1 -1
- data/lib/baran/text_splitter.rb +6 -4
- data/lib/baran/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 51599bea086ef467f089b64f9252aa87a881c1889c776219695cec4922318be8
|
4
|
+
data.tar.gz: b9c20815e90bf477c8b98b3fb2580013c791dcf1a387332552bf51ae33882768
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8add731f1eb06baa85e7bfe495f11ce58be02b8ca6947f3f0bbfc1d7ac85cad3b693d24dc7f4f9124e5a357f784d5f42eb6875e36d7d2fb1aac09dda22dd8862
|
7
|
+
data.tar.gz: 54e8924c8c2c86f524805b99cbeab0bb77a2784d72191fd35857def7450f9e9389176a63d684c4346c17787ad34bd7f3c9155b3803a6be2d0b22de24691a9ed2
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
baran (0.1.
|
4
|
+
baran (0.1.9)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
-
minitest (5.
|
9
|
+
minitest (5.20.0)
|
10
10
|
rake (13.0.6)
|
11
11
|
|
12
12
|
PLATFORMS
|
@@ -15,7 +15,7 @@ PLATFORMS
|
|
15
15
|
|
16
16
|
DEPENDENCIES
|
17
17
|
baran!
|
18
|
-
minitest (~> 5.
|
18
|
+
minitest (~> 5.20)
|
19
19
|
rake (~> 13.0)
|
20
20
|
|
21
21
|
BUNDLED WITH
|
data/lib/baran/text_splitter.rb
CHANGED
@@ -14,12 +14,14 @@ module Baran
|
|
14
14
|
raise NotImplementedError, "splitted method should be implemented in a subclass"
|
15
15
|
end
|
16
16
|
|
17
|
-
def chunks(text)
|
17
|
+
def chunks(text, metadata: nil)
|
18
18
|
cursor = 0
|
19
19
|
chunks = []
|
20
20
|
|
21
|
-
splitted(text).each do |chunk|
|
22
|
-
|
21
|
+
splitted(text).compact.each do |chunk|
|
22
|
+
chunk = { text: chunk, cursor: cursor }
|
23
|
+
chunk[:metadata] = metadata if metadata
|
24
|
+
chunks << chunk
|
23
25
|
cursor += chunk.length
|
24
26
|
end
|
25
27
|
|
@@ -56,4 +58,4 @@ module Baran
|
|
56
58
|
results
|
57
59
|
end
|
58
60
|
end
|
59
|
-
end
|
61
|
+
end
|
data/lib/baran/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baran
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Moeki Kawakami
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Text Splitter for Large Language Model Datasets.
|
14
14
|
email:
|