chunker-ruby 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +235 -0
- data/lib/chunker_ruby/base_splitter.rb +28 -19
- data/lib/chunker_ruby/chunk.rb +10 -0
- data/lib/chunker_ruby/json_splitter.rb +23 -2
- data/lib/chunker_ruby/rails/chunkable.rb +3 -0
- data/lib/chunker_ruby/semantic.rb +9 -3
- data/lib/chunker_ruby/token.rb +7 -3
- data/lib/chunker_ruby/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2ef1a60bf60351dc527abc378d992bfa05b0de0d9c64af3db4edbb63b9539c61
|
|
4
|
+
data.tar.gz: 01452e12091762a1dee9e86b2613e525a3dd9536cd51fd5d52da19e4a4f829dd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a4b276bd94c9c0e7c6749223eecf8e78aa578f65866a4ab98f0d53e6245fc29ff622e6d8d262d3937c22b1f9e35e23f875c3ca52d5fc188138090c251ce1de29
|
|
7
|
+
data.tar.gz: d4c86c9423f92c20526a4f771c61e95f24edc7a11bf6920189d0eb408213c49e6f9828e02b1c6cb25121953a2f352a9b8c9f9e46db42af0ec5eca98b71820a2a
|
data/README.md
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# chunker-ruby
|
|
2
|
+
|
|
3
|
+
Text chunking/splitting library for Ruby, designed for RAG (Retrieval-Augmented Generation) pipelines. Split documents into optimal pieces for embedding and vector search.
|
|
4
|
+
|
|
5
|
+
Bad chunking = bad retrieval = bad RAG. This gem solves that.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem install chunker-ruby
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or add to your Gemfile:
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
gem "chunker-ruby"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
```ruby
|
|
22
|
+
require "chunker_ruby"
|
|
23
|
+
|
|
24
|
+
text = File.read("long_document.md")
|
|
25
|
+
|
|
26
|
+
# Simple split (uses RecursiveCharacter by default)
|
|
27
|
+
chunks = ChunkerRuby.split(text, chunk_size: 1000, chunk_overlap: 200)
|
|
28
|
+
|
|
29
|
+
chunks.each do |chunk|
|
|
30
|
+
chunk.text # => "The document begins..."
|
|
31
|
+
chunk.index # => 0
|
|
32
|
+
chunk.offset # => 0 (character offset in original)
|
|
33
|
+
chunk.length # => 342
|
|
34
|
+
chunk.metadata # => {}
|
|
35
|
+
end
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Strategies
|
|
39
|
+
|
|
40
|
+
### Character
|
|
41
|
+
|
|
42
|
+
Fixed character count with overlap. Simplest strategy.
|
|
43
|
+
|
|
44
|
+
```ruby
|
|
45
|
+
chunker = ChunkerRuby::Character.new(chunk_size: 1000, chunk_overlap: 200)
|
|
46
|
+
chunks = chunker.split(text)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### RecursiveCharacter
|
|
50
|
+
|
|
51
|
+
Tries splitting by paragraph, then sentence, then word, then character. The most generally useful strategy.
|
|
52
|
+
|
|
53
|
+
```ruby
|
|
54
|
+
chunker = ChunkerRuby::RecursiveCharacter.new(
|
|
55
|
+
chunk_size: 1000,
|
|
56
|
+
chunk_overlap: 200,
|
|
57
|
+
separators: ["\n\n", "\n", ". ", ", ", " ", ""] # default
|
|
58
|
+
)
|
|
59
|
+
chunks = chunker.split(text)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Sentence
|
|
63
|
+
|
|
64
|
+
Splits on sentence boundaries. Handles abbreviations (Dr., Mr., etc.) and decimal numbers.
|
|
65
|
+
|
|
66
|
+
```ruby
|
|
67
|
+
chunker = ChunkerRuby::Sentence.new(
|
|
68
|
+
min_chunk_size: 500,
|
|
69
|
+
max_chunk_size: 1500
|
|
70
|
+
)
|
|
71
|
+
chunks = chunker.split(text)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Separator
|
|
75
|
+
|
|
76
|
+
Split on a specific string or regex.
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
chunker = ChunkerRuby::Separator.new(
|
|
80
|
+
separator: "\n\n", # or a Regexp
|
|
81
|
+
keep_separator: true,
|
|
82
|
+
chunk_size: 1000
|
|
83
|
+
)
|
|
84
|
+
chunks = chunker.split(text)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Markdown
|
|
88
|
+
|
|
89
|
+
Splits on markdown headers (h1-h6). Respects code blocks. Preserves header hierarchy in metadata.
|
|
90
|
+
|
|
91
|
+
```ruby
|
|
92
|
+
chunker = ChunkerRuby::Markdown.new(chunk_size: 1000, chunk_overlap: 100)
|
|
93
|
+
chunks = chunker.split(markdown_text)
|
|
94
|
+
|
|
95
|
+
chunks.first.metadata[:headers] # => ["# Introduction", "## Background"]
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### HTML
|
|
99
|
+
|
|
100
|
+
Splits on HTML block tags. Optionally strips tags.
|
|
101
|
+
|
|
102
|
+
```ruby
|
|
103
|
+
chunker = ChunkerRuby::HTML.new(chunk_size: 1000, strip_tags: true)
|
|
104
|
+
chunks = chunker.split(html_text)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Code
|
|
108
|
+
|
|
109
|
+
Splits on function/class/method boundaries. Supports Ruby, Python, JavaScript, and TypeScript.
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
chunker = ChunkerRuby::Code.new(language: :ruby, chunk_size: 1500)
|
|
113
|
+
chunks = chunker.split(source_code)
|
|
114
|
+
|
|
115
|
+
chunks.first.metadata[:language] # => :ruby
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### JSON
|
|
119
|
+
|
|
120
|
+
Splits JSON arrays/objects into chunks. Each chunk is valid JSON.
|
|
121
|
+
|
|
122
|
+
```ruby
|
|
123
|
+
chunker = ChunkerRuby::JSONSplitter.new(chunk_size: 1000, chunk_overlap: 0)
|
|
124
|
+
chunks = chunker.split(json_string)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Token
|
|
128
|
+
|
|
129
|
+
Splits by token count. Uses `tokenizer-ruby` if available, falls back to character estimation (~4 chars/token).
|
|
130
|
+
|
|
131
|
+
```ruby
|
|
132
|
+
chunker = ChunkerRuby::Token.new(
|
|
133
|
+
chunk_size: 512, # in tokens
|
|
134
|
+
chunk_overlap: 50,
|
|
135
|
+
tokenizer: "gpt2"
|
|
136
|
+
)
|
|
137
|
+
chunks = chunker.split(text)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Semantic
|
|
141
|
+
|
|
142
|
+
Splits where embedding similarity drops (topic boundaries). Requires an embedding function.
|
|
143
|
+
|
|
144
|
+
```ruby
|
|
145
|
+
chunker = ChunkerRuby::Semantic.new(
|
|
146
|
+
embed: ->(text) { my_embedding_function(text) },
|
|
147
|
+
threshold: 0.5,
|
|
148
|
+
min_chunk_size: 100,
|
|
149
|
+
max_chunk_size: 2000
|
|
150
|
+
)
|
|
151
|
+
chunks = chunker.split(text)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Sliding Window
|
|
155
|
+
|
|
156
|
+
Fixed-size sliding window with configurable stride.
|
|
157
|
+
|
|
158
|
+
```ruby
|
|
159
|
+
chunker = ChunkerRuby::SlidingWindow.new(
|
|
160
|
+
chunk_size: 500,
|
|
161
|
+
chunk_overlap: 100,
|
|
162
|
+
stride: 200 # optional, defaults to chunk_size - chunk_overlap
|
|
163
|
+
)
|
|
164
|
+
chunks = chunker.split(text)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Chunk Object
|
|
168
|
+
|
|
169
|
+
Every strategy returns an array of `ChunkerRuby::Chunk` objects:
|
|
170
|
+
|
|
171
|
+
```ruby
|
|
172
|
+
chunk.text # chunk content
|
|
173
|
+
chunk.index # position in sequence (0, 1, 2, ...)
|
|
174
|
+
chunk.offset # character offset in original document
|
|
175
|
+
chunk.length # character length
|
|
176
|
+
chunk.metadata # arbitrary metadata hash
|
|
177
|
+
chunk.token_count # estimated token count (or exact with tokenizer)
|
|
178
|
+
chunk.to_h # { text:, index:, offset:, length:, metadata: }
|
|
179
|
+
chunk.to_s # same as chunk.text
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Splitting Multiple Documents
|
|
183
|
+
|
|
184
|
+
```ruby
|
|
185
|
+
splitter = ChunkerRuby::RecursiveCharacter.new(chunk_size: 1000)
|
|
186
|
+
chunks = splitter.split_many(["First document...", "Second document..."])
|
|
187
|
+
|
|
188
|
+
chunks.first.metadata[:doc_index] # => 0
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Rails Integration
|
|
192
|
+
|
|
193
|
+
```ruby
|
|
194
|
+
class Document < ApplicationRecord
|
|
195
|
+
include ChunkerRuby::Rails::Chunkable
|
|
196
|
+
|
|
197
|
+
chunkable :content,
|
|
198
|
+
strategy: :markdown,
|
|
199
|
+
chunk_size: 1000,
|
|
200
|
+
chunk_overlap: 200
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
document = Document.create!(content: long_text)
|
|
204
|
+
document.chunks # => [#<DocumentChunk text="..." chunk_index=0>, ...]
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
Requires a `DocumentChunk` model with `text`, `chunk_index`, `offset`, and `metadata` columns.
|
|
208
|
+
|
|
209
|
+
## Choosing a Strategy
|
|
210
|
+
|
|
211
|
+
| Use Case | Recommended Strategy |
|
|
212
|
+
|---|---|
|
|
213
|
+
| General text | `RecursiveCharacter` |
|
|
214
|
+
| Markdown docs | `Markdown` |
|
|
215
|
+
| Source code | `Code` |
|
|
216
|
+
| HTML pages | `HTML` |
|
|
217
|
+
| LLM context window management | `Token` |
|
|
218
|
+
| Topic-based splitting | `Semantic` |
|
|
219
|
+
| Simple fixed-size | `Character` or `SlidingWindow` |
|
|
220
|
+
|
|
221
|
+
## Chunk Size Guidelines
|
|
222
|
+
|
|
223
|
+
- **256-512 tokens**: Precise, fact-based retrieval (FAQ, definitions)
|
|
224
|
+
- **512-1024 tokens**: Good balance for most use cases (docs, articles)
|
|
225
|
+
- **1024-2048 tokens**: Complex topics needing more context (tutorials, guides)
|
|
226
|
+
- **10-20% overlap**: Prevents context loss at boundaries
|
|
227
|
+
|
|
228
|
+
## Dependencies
|
|
229
|
+
|
|
230
|
+
- **Runtime**: None (pure Ruby)
|
|
231
|
+
- **Optional**: `tokenizer-ruby` for token-based chunking
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
MIT
|
|
@@ -25,6 +25,31 @@ module ChunkerRuby
|
|
|
25
25
|
|
|
26
26
|
def build_chunks(pieces, original_text, metadata: {})
|
|
27
27
|
chunks = []
|
|
28
|
+
current_pos = 0
|
|
29
|
+
|
|
30
|
+
merged = merge_pieces(pieces)
|
|
31
|
+
|
|
32
|
+
merged.each do |chunk_text|
|
|
33
|
+
next if chunk_text.strip.empty?
|
|
34
|
+
|
|
35
|
+
# Find the actual position starting from current_pos
|
|
36
|
+
offset = original_text.index(chunk_text, current_pos) || current_pos
|
|
37
|
+
|
|
38
|
+
chunks << Chunk.new(
|
|
39
|
+
text: chunk_text,
|
|
40
|
+
index: chunks.size,
|
|
41
|
+
offset: offset,
|
|
42
|
+
metadata: metadata.dup
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
current_pos = offset + chunk_text.length
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
chunks
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def merge_pieces(pieces)
|
|
52
|
+
merged = []
|
|
28
53
|
current_parts = []
|
|
29
54
|
current_length = 0
|
|
30
55
|
|
|
@@ -32,14 +57,7 @@ module ChunkerRuby
|
|
|
32
57
|
piece_len = piece.length
|
|
33
58
|
|
|
34
59
|
if current_length + piece_len > @chunk_size && !current_parts.empty?
|
|
35
|
-
|
|
36
|
-
offset = original_text.index(chunk_text) || 0
|
|
37
|
-
chunks << Chunk.new(
|
|
38
|
-
text: chunk_text,
|
|
39
|
-
index: chunks.size,
|
|
40
|
-
offset: offset,
|
|
41
|
-
metadata: metadata.dup
|
|
42
|
-
)
|
|
60
|
+
merged << current_parts.join
|
|
43
61
|
|
|
44
62
|
# Handle overlap: keep trailing parts that fit within overlap size
|
|
45
63
|
overlap_parts = []
|
|
@@ -61,18 +79,9 @@ module ChunkerRuby
|
|
|
61
79
|
current_length += piece_len
|
|
62
80
|
end
|
|
63
81
|
|
|
64
|
-
unless current_parts.empty?
|
|
65
|
-
chunk_text = current_parts.join
|
|
66
|
-
offset = original_text.rindex(chunk_text) || 0
|
|
67
|
-
chunks << Chunk.new(
|
|
68
|
-
text: chunk_text,
|
|
69
|
-
index: chunks.size,
|
|
70
|
-
offset: offset,
|
|
71
|
-
metadata: metadata.dup
|
|
72
|
-
)
|
|
73
|
-
end
|
|
82
|
+
merged << current_parts.join unless current_parts.empty?
|
|
74
83
|
|
|
75
|
-
|
|
84
|
+
merged
|
|
76
85
|
end
|
|
77
86
|
end
|
|
78
87
|
end
|
data/lib/chunker_ruby/chunk.rb
CHANGED
|
@@ -29,6 +29,16 @@ module ChunkerRuby
|
|
|
29
29
|
{ text: @text, index: @index, offset: @offset, length: @length, metadata: @metadata }
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
+
def valid?(original_text = nil)
|
|
33
|
+
return false if text.nil? || text.empty?
|
|
34
|
+
return false if offset.negative?
|
|
35
|
+
return false if index.negative?
|
|
36
|
+
if original_text
|
|
37
|
+
return false unless original_text[offset, text.length] == text
|
|
38
|
+
end
|
|
39
|
+
true
|
|
40
|
+
end
|
|
41
|
+
|
|
32
42
|
def ==(other)
|
|
33
43
|
other.is_a?(Chunk) && text == other.text && index == other.index && offset == other.offset
|
|
34
44
|
end
|
|
@@ -10,6 +10,7 @@ module ChunkerRuby
|
|
|
10
10
|
parsed = ::JSON.parse(text)
|
|
11
11
|
pieces = extract_pieces(parsed)
|
|
12
12
|
chunks = []
|
|
13
|
+
current_pos = 0
|
|
13
14
|
|
|
14
15
|
current_parts = []
|
|
15
16
|
current_length = 0
|
|
@@ -19,10 +20,13 @@ module ChunkerRuby
|
|
|
19
20
|
|
|
20
21
|
if current_length + json_str.length > @chunk_size && !current_parts.empty?
|
|
21
22
|
chunk_text = ::JSON.generate(current_parts.length == 1 ? current_parts.first : current_parts)
|
|
23
|
+
# Search for a key or value from the first piece to approximate offset
|
|
24
|
+
offset = find_json_offset(text, current_parts.first, current_pos)
|
|
25
|
+
current_pos = offset + chunk_text.length
|
|
22
26
|
chunks << Chunk.new(
|
|
23
27
|
text: chunk_text,
|
|
24
28
|
index: chunks.size,
|
|
25
|
-
offset:
|
|
29
|
+
offset: offset,
|
|
26
30
|
metadata: metadata.dup
|
|
27
31
|
)
|
|
28
32
|
current_parts = []
|
|
@@ -35,10 +39,11 @@ module ChunkerRuby
|
|
|
35
39
|
|
|
36
40
|
unless current_parts.empty?
|
|
37
41
|
chunk_text = ::JSON.generate(current_parts.length == 1 ? current_parts.first : current_parts)
|
|
42
|
+
offset = find_json_offset(text, current_parts.first, current_pos)
|
|
38
43
|
chunks << Chunk.new(
|
|
39
44
|
text: chunk_text,
|
|
40
45
|
index: chunks.size,
|
|
41
|
-
offset:
|
|
46
|
+
offset: offset,
|
|
42
47
|
metadata: metadata.dup
|
|
43
48
|
)
|
|
44
49
|
end
|
|
@@ -48,6 +53,22 @@ module ChunkerRuby
|
|
|
48
53
|
|
|
49
54
|
private
|
|
50
55
|
|
|
56
|
+
def find_json_offset(text, first_piece, current_pos)
|
|
57
|
+
# Try to find a recognizable key or value from the first piece in the original text
|
|
58
|
+
search_str = case first_piece
|
|
59
|
+
when Hash
|
|
60
|
+
first_piece.keys.first.to_s
|
|
61
|
+
when String
|
|
62
|
+
first_piece
|
|
63
|
+
else
|
|
64
|
+
first_piece.to_s
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Search for the key/value string as it would appear in JSON (quoted)
|
|
68
|
+
quoted = "\"#{search_str}\""
|
|
69
|
+
text.index(quoted, current_pos) || text.index(search_str, current_pos) || current_pos
|
|
70
|
+
end
|
|
71
|
+
|
|
51
72
|
def extract_pieces(parsed)
|
|
52
73
|
case parsed
|
|
53
74
|
when Array
|
|
@@ -62,6 +62,9 @@ module ChunkerRuby
|
|
|
62
62
|
when :html then ChunkerRuby::HTML
|
|
63
63
|
when :code then ChunkerRuby::Code
|
|
64
64
|
when :token then ChunkerRuby::Token
|
|
65
|
+
when :semantic then ChunkerRuby::Semantic
|
|
66
|
+
when :json then ChunkerRuby::JSONSplitter
|
|
67
|
+
when :sliding_window then ChunkerRuby::SlidingWindow
|
|
65
68
|
else raise ArgumentError, "Unknown chunking strategy: #{strategy}"
|
|
66
69
|
end
|
|
67
70
|
end
|
|
@@ -48,6 +48,7 @@ module ChunkerRuby
|
|
|
48
48
|
|
|
49
49
|
def build_semantic_chunks(sentences, split_points, original_text, metadata)
|
|
50
50
|
chunks = []
|
|
51
|
+
current_pos = 0
|
|
51
52
|
boundaries = [-1] + split_points + [sentences.length - 1]
|
|
52
53
|
|
|
53
54
|
(0...boundaries.length - 1).each do |i|
|
|
@@ -64,15 +65,18 @@ module ChunkerRuby
|
|
|
64
65
|
)
|
|
65
66
|
sub_chunks = sub_splitter.split(chunk_text, metadata: metadata)
|
|
66
67
|
sub_chunks.each do |sc|
|
|
68
|
+
offset = original_text.index(sc.text, current_pos) || current_pos
|
|
69
|
+
current_pos = offset + sc.text.length
|
|
67
70
|
chunks << Chunk.new(
|
|
68
71
|
text: sc.text,
|
|
69
72
|
index: chunks.size,
|
|
70
|
-
offset:
|
|
73
|
+
offset: offset,
|
|
71
74
|
metadata: sc.metadata
|
|
72
75
|
)
|
|
73
76
|
end
|
|
74
77
|
elsif chunk_text.length >= @min_chunk_size
|
|
75
|
-
offset = original_text.index(chunk_text) ||
|
|
78
|
+
offset = original_text.index(chunk_text, current_pos) || current_pos
|
|
79
|
+
current_pos = offset + chunk_text.length
|
|
76
80
|
chunks << Chunk.new(
|
|
77
81
|
text: chunk_text,
|
|
78
82
|
index: chunks.size,
|
|
@@ -89,8 +93,10 @@ module ChunkerRuby
|
|
|
89
93
|
offset: prev.offset,
|
|
90
94
|
metadata: prev.metadata
|
|
91
95
|
)
|
|
96
|
+
current_pos = prev.offset + merged.length
|
|
92
97
|
else
|
|
93
|
-
offset = original_text.index(chunk_text) ||
|
|
98
|
+
offset = original_text.index(chunk_text, current_pos) || current_pos
|
|
99
|
+
current_pos = offset + chunk_text.length
|
|
94
100
|
chunks << Chunk.new(
|
|
95
101
|
text: chunk_text,
|
|
96
102
|
index: chunks.size,
|
data/lib/chunker_ruby/token.rb
CHANGED
|
@@ -45,15 +45,19 @@ module ChunkerRuby
|
|
|
45
45
|
tokens = @tokenizer.encode(text)
|
|
46
46
|
chunks = []
|
|
47
47
|
start = 0
|
|
48
|
+
current_pos = 0
|
|
48
49
|
|
|
49
50
|
while start < tokens.length
|
|
50
51
|
end_pos = [start + @chunk_size, tokens.length].min
|
|
51
52
|
chunk_tokens = tokens[start...end_pos]
|
|
52
|
-
|
|
53
|
+
raw_text = @tokenizer.decode(chunk_tokens)
|
|
54
|
+
stripped = raw_text.strip
|
|
55
|
+
|
|
56
|
+
offset = text.index(stripped, current_pos) || current_pos
|
|
57
|
+
current_pos = offset + stripped.length
|
|
53
58
|
|
|
54
|
-
offset = text.index(chunk_text.strip) || 0
|
|
55
59
|
chunks << Chunk.new(
|
|
56
|
-
text:
|
|
60
|
+
text: raw_text,
|
|
57
61
|
index: chunks.size,
|
|
58
62
|
offset: offset,
|
|
59
63
|
metadata: metadata.merge(token_count: chunk_tokens.length)
|
data/lib/chunker_ruby/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: chunker-ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Johannes Dwi Cahyo
|
|
@@ -18,6 +18,7 @@ extensions: []
|
|
|
18
18
|
extra_rdoc_files: []
|
|
19
19
|
files:
|
|
20
20
|
- LICENSE
|
|
21
|
+
- README.md
|
|
21
22
|
- lib/chunker_ruby.rb
|
|
22
23
|
- lib/chunker_ruby/base_splitter.rb
|
|
23
24
|
- lib/chunker_ruby/character.rb
|